{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "820EGXmsTeQ1"
      },
      "source": [
        "Licensed under the Apache License, Version 2.0 (the \"License\");"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FWZ1wUyvL8B5"
      },
      "outputs": [],
      "source": [
        "import collections\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import pandas as pd\n",
        "import numpy as np"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qYEcJz6FTOwT"
      },
      "source": [
        "# Read the Human Proteome"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xBZeAp_QMHPE",
        "outputId": "57e3d262-07c6-495e-c0a8-12f2fd13acfd"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2021-06-07 19:25:35--  https://www.uniprot.org/uniprot/?query=reviewed%3Ayes+AND+proteome%3Aup000005640\u0026format=fasta\n",
            "Resolving www.uniprot.org (www.uniprot.org)... 193.62.192.81\n",
            "Connecting to www.uniprot.org (www.uniprot.org)|193.62.192.81|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 \n",
            "Length: unspecified [text/plain]\n",
            "Saving to: ‘uniprot.fasta’\n",
            "\n",
            "uniprot.fasta           [     \u003c=\u003e            ]  12.98M  1.48MB/s    in 8.2s    \n",
            "\n",
            "2021-06-07 19:25:43 (1.58 MB/s) - ‘uniprot.fasta’ saved [13606528]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Download from uniprot: https://www.uniprot.org/help/human_proteome\n",
        "!wget -O uniprot.fasta \"https://www.uniprot.org/uniprot/?query=reviewed%3Ayes+AND+proteome%3Aup000005640\u0026format=fasta\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zdUTQDinS6J8",
        "outputId": "dd700158-2b9f-4655-f087-e8ebe91911f7"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Read 20380 entries from uniprot.fasta\n"
          ]
        }
      ],
      "source": [
        "def fasta_iterator(file_handle):\n",
        "  partial_sequence = ''\n",
        "  line = file_handle.readline()\n",
        "  while line:\n",
        "    if line.startswith('\u003e'):\n",
        "      if partial_sequence:\n",
        "        yield partial_sequence\n",
        "        partial_sequence = ''\n",
        "    else:\n",
        "      partial_sequence += line.strip()\n",
        "    line = file_handle.readline()\n",
        "  if partial_sequence:\n",
        "    yield partial_sequence\n",
        "\n",
        "def read_seqs_from_fasta(filepath):\n",
        "  all_seqs = []\n",
        "  with open(filepath, 'rt') as f:\n",
        "    for seq in fasta_iterator(f):\n",
        "      all_seqs.append(seq)\n",
        "  print('Read %d entries from %s' % (len(all_seqs), filepath))\n",
        "  return all_seqs\n",
        "\n",
        "full_seqs = read_seqs_from_fasta('uniprot.fasta')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "47iIqM963Gm3"
      },
      "source": [
        "#LysC digestion"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "ALTXEFni2t1z",
        "outputId": "c2ac415a-9c68-43ff-da32-a2f8f726f458"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\u003cdiv\u003e\n",
              "\u003cstyle scoped\u003e\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "\u003c/style\u003e\n",
              "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n",
              "  \u003cthead\u003e\n",
              "    \u003ctr style=\"text-align: right;\"\u003e\n",
              "      \u003cth\u003e\u003c/th\u003e\n",
              "      \u003cth\u003eprotein_num\u003c/th\u003e\n",
              "      \u003cth\u003efragment_num\u003c/th\u003e\n",
              "      \u003cth\u003eraw_fragment\u003c/th\u003e\n",
              "      \u003cth\u003efragment_len\u003c/th\u003e\n",
              "    \u003c/tr\u003e\n",
              "  \u003c/thead\u003e\n",
              "  \u003ctbody\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e0\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003eMWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTL...\u003c/td\u003e\n",
              "      \u003ctd\u003e81\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e1\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e1\u003c/td\u003e\n",
              "      \u003ctd\u003eEITK\u003c/td\u003e\n",
              "      \u003ctd\u003e4\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e2\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "      \u003ctd\u003eHWEWLENNLLQTLSIFDSEEDITTFVK\u003c/td\u003e\n",
              "      \u003ctd\u003e27\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e3\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e3\u003c/td\u003e\n",
              "      \u003ctd\u003eGK\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e4\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e4\u003c/td\u003e\n",
              "      \u003ctd\u003eIHGIIAEENK\u003c/td\u003e\n",
              "      \u003ctd\u003e10\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e...\u003c/th\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670057\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e65\u003c/td\u003e\n",
              "      \u003ctd\u003eNEEDK\u003c/td\u003e\n",
              "      \u003ctd\u003e5\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670058\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e66\u003c/td\u003e\n",
              "      \u003ctd\u003eLK\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670059\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e67\u003c/td\u003e\n",
              "      \u003ctd\u003eDWEGGLDEQRLSADSGYIIPLPDIDPVPEEEDLGK\u003c/td\u003e\n",
              "      \u003ctd\u003e35\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670060\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e68\u003c/td\u003e\n",
              "      \u003ctd\u003eRNRHSSQTSEESAIETGSSSSTFIK\u003c/td\u003e\n",
              "      \u003ctd\u003e25\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670061\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e69\u003c/td\u003e\n",
              "      \u003ctd\u003eREDETIEDIDMMDDIGIDSSDLVEDSFL\u003c/td\u003e\n",
              "      \u003ctd\u003e28\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "  \u003c/tbody\u003e\n",
              "\u003c/table\u003e\n",
              "\u003cp\u003e670062 rows × 4 columns\u003c/p\u003e\n",
              "\u003c/div\u003e"
            ],
            "text/plain": [
              "        protein_num  ...  fragment_len\n",
              "0                 0  ...            81\n",
              "1                 0  ...             4\n",
              "2                 0  ...            27\n",
              "3                 0  ...             2\n",
              "4                 0  ...            10\n",
              "...             ...  ...           ...\n",
              "670057        20379  ...             5\n",
              "670058        20379  ...             2\n",
              "670059        20379  ...            35\n",
              "670060        20379  ...            25\n",
              "670061        20379  ...            28\n",
              "\n",
              "[670062 rows x 4 columns]"
            ]
          },
          "execution_count": 4,
          "metadata": {
            "tags": []
          },
          "output_type": "execute_result"
        }
      ],
      "source": [
        "LYSINE = 'K'\n",
        "\n",
        "def lys_c_digest(protein_sequence):\n",
        "  \"\"\"Return the pieces of a sequence after Lys-C digestion.\"\"\"\n",
        "  pieces = []\n",
        "  last_cut_pos = len(protein_sequence)\n",
        "  for i in reversed(range(len(protein_sequence) - 1)):\n",
        "    current_char = protein_sequence[i]\n",
        "\n",
        "    if current_char == LYSINE:\n",
        "      piece = protein_sequence[i + 1:last_cut_pos + 1]\n",
        "      pieces.append(piece)\n",
        "      last_cut_pos = i\n",
        "\n",
        "  if last_cut_pos \u003e= 0:\n",
        "    piece = protein_sequence[0:last_cut_pos + 1]\n",
        "    pieces.append(piece)\n",
        "\n",
        "  return list(reversed(pieces))\n",
        "\n",
        "def make_fragment_df(seqs):\n",
        "  tuples = []\n",
        "  for protein_num, seq in enumerate(seqs):\n",
        "    digested_fragments = lys_c_digest(seq)\n",
        "    for fragment_num, fragment in enumerate(digested_fragments):\n",
        "      tuples.append((protein_num, fragment_num, fragment))\n",
        "  df = pd.DataFrame.from_records(tuples, columns=('protein_num', 'fragment_num', 'raw_fragment'))\n",
        "  df['fragment_len'] = df['raw_fragment'].str.len()\n",
        "  return df\n",
        "\n",
        "full_fragment_df = make_fragment_df(full_seqs)\n",
        "full_fragment_df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 296
        },
        "id": "1R-biZad_f_J",
        "outputId": "8d635842-dfc8-4fa0-a296-b697c67498e7"
      },
      "outputs": [
        {
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ4AAAEXCAYAAACdwyIfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAd1klEQVR4nO3de7xVdZ3/8ddbECNNQSFGAcULZuY88kKKl/qZGqFTaWVepgRNxVInbbpoNb+fltNUj6ksp8QrAzrmJdKRGgwJr3kHNRW1kbwECIIiXrIy5PP7Y32PLI777LP34ezvPuz9fj4e+3HW+q7bd6292O+9vuvL2ooIzMzMctmg2RUwM7P24uAxM7OsHDxmZpaVg8fMzLJy8JiZWVYOHjMzy8rB02YkzZe0f7Pr0UySPi5poaRXJe3W7PrUQ9IoSSGpfxq/QdLEzHV4v6TfN2jd6+17Y7Vz8LQQSU9LOqhT2bGSftsxHhHviYhbulnPWh9uLej7wKkRsUlEPNB5Ytr3HRq1cUl7SpopaaWkFZLulXRcT9YVEQdHxLTermNZ5+MREbdHxLsatLmq7836pNHn0frMwWPZ9YFA2waY34wNS9obuAm4FdgB2AL4PHBwM+rTB3X53vSB88Z6iYOnzZSvitI377mSXpb0nKQfptluS39XpiaPvSVtIOlfJD0jaZmkyyRtVlrvhDTtBUn/t9N2zpY0XdJ/SXoZODZt+670rX+JpJ9IGlBaX0g6WdITkl6RdI6k7SXdmep7TXn+TvtYsa6SNpL0KtAP+J2kP9R4zP5O0muStiiV7S5puaQNJe0g6VZJL0l6XtLVVVb378C0iPheRDwfhXkRcUQX2+4n6ftpvU8C/9Bp+i2STijN+4M071OSTu3ULLeZpEvT8V4s6V8l9UvTKu6DpI5z4XfpXDhS0v6SFpXq8O5Uj5UqmnI/Vpo2VdJPJf1Peh/vkbR9hf2s+N6k8+gMSQ8Bf5LUX9KZkv6Q1veopI93Ol7VjsEtab/vTPvzS0lbSLoinVf3SRpVWt9OkmaruDL9vaQjStO63LcujtsQSb/Smivd2yW152dwRPjVIi/gaeCgTmXHAr+tNA9wF3BMGt4EGJuGRwEB9C8t91lgAbBdmvda4PI0bWfgVWA/YABFc8nfSts5O40fRvFlZyCwBzAW6J+29xhweml7AVwPbAq8B/grMCdtfzPgUWBiF8ehy7qW1r1DleP4lunATODzpfFzgf9Iw1cC30j79jZgvy7W+3bgDeCDdbynnwMeB0YCmwM3l98b4BbghNK8jwIjgMHAbzrNex1wIbAx8E7gXuCk7vah8/EA9gcWpeEN07H+enrvDwBeAd6Vpk8FXgD2TO/1FcBVtR57ivP1wbT/A1PZp4CtUl2PBP4EbFnjMbgl1Xf70nn0v8BBqX6XAf+Z5t0YWAgcl6btBjwP7FzLvlXYl+8AF6RjtiHwfkDN/txoxqvpFfCrF9/M4h/pq8DK0us1ug6e24BvAkM6rWcUbw2eOcDJpfF3UYRJf+D/AVeWpr0deJ21g+e2bup+OnBdaTyAfUvj84AzSuM/AH7Uxbq6rGtp3fUGz5HAHWm4H7AU2DONXwZcBIzoZh+Hp3XvVMd7ehPwudL4uAofpCeU5j2pNO9BHfMCwyjCe2Bp+tHAzd3tQ4UP0P1ZEzzvT8dig9L0K4Gz0/BU4JLStEOAx2s99ul8/Ww3x+hB4NDujkHpeH2j03l0Q2n8o8CDpff89k7buhA4q5Z9q7Av36L4MtXludcur/a8zGtth0XEoI4XcHKVeY8HdgQeT00MH6ky71bAM6XxZ1jzgbYVxTdDACLiNYpvgmULyyOSdkzNDktVNL/9GzCk0zLPlYb/XGF8kx7UtaeuB3aWtC3wIeCliLg3TfsqIODe1NT0WQBJX0/NLK9KugB4EVgNbFnHdtc6tqy9X93NWx7ehuJb9pLU1LOS4kP0ndX2odb6RcTqTnUcXhpfWhp+ja7ft650PncmSHqwtB+7sObcqXYMOtR6Xm0D7NWxnbStTwN/V5q/nn37d4qrrRslPSnpzCrztjTfrGtjEfEEcHRqZ/4EMD3dx6j0yPJnKf4hdtgaWEXxj3YJxVUFAJIGUtw0X2tzncYnAw8AR0fEK5JOBw5fh92pta49EhF/kXQN8BlgJ+Dy0rSlwIkAkvYDfiPptoj4N4pAfZOku4BPUjSZ1WIJRTNTh627mXdEaby83EKKK54hEbGq84JV9mFBN/V7FhgpaYNS+GxN0XzVW948dyRtA1wMHAjcFRFvSHqQIjSh+jGo10Lg1oj40Dqs400R8QrwJeBLknYBbpJ0X0TM6Y31r098xdPGJH1G0tD0gbEyFa8Glqe/25VmvxL4oqRtJW1C8YF6dfoQmw58VNI+Km74n82aD4KuvAN4GXhV0k4UPbt6S7W61mqApLeVXv0omqOOBT5GKXgkfUpSx4fdixQflKs7rzD5KkXniq+kkEfSeyVd1cX81wBfkDRC0mCg2rfka4DTJA2XNAg4o2NCRCwBbgR+IGlTFR0wtpf0f2rYh+dY+1wou4fim/5XVXS02J+iuaqr/VlXG6e6LU/1Po7iiqdDl8egB34F7CjpmLRvG0p6n6R317j8WsdN0kdUdOIQ8BLF/b6uzpOW5uBpb+OB+Sp6E/0YOCoi/pyayr4N3JGaGMYCUyg+bG8DngL+AvwTQETMT8NXUXzjfBVYRvENuytfBv6R4kb0xUC1nmD16rKudZhP0ezS8TouIu6g+KC4PyLKTV7vA+5Jx3EGcFpEPFlppRFxJ8UN+AOAJyWtoLi3MrOLelwMzAJ+B9xP0VGiKxdThMtDFFeTMymu9N5I0ydQdAB4lCJcprOm2a/aPpwNTEvnwlq97yLidYqgOZjixvv5wISIeLxKPXssIh6luC9zF8UH+98Dd5Rm6e4Y1LOtVyjuqR1FcWW3FPgesFGNqzibtY/baIrODq+m+p8fEbVe+bYUpZteZr0mXWWsBEZHxFPNrk9vknQT8LOIuKTZdemOpIOBCyJim25nblE+Bn2Tr3isV0j6qKS3S9qYojv1wxQ9klqGpPcBu9O7V2e9RtJASYeo+L8uw4GzKLpQtw0fg/WDg8d6y6EUzRHPUjQpHBUtdDktaRpFM8npqQmmLxJF9/gXKZqZHqPo6t5OfAzWA25qMzOzrHzFY2ZmWfn/8SRDhgyJUaNGNbsaZmbrlXnz5j0fEUPrWcbBk4waNYq5c+c2uxpmZusVSdWeplGRm9rMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg4eMzPLysFjZmZZOXjMzCwrB08vGD5yayT1+DV8ZLVfMzYzay1+ZE4veHbRQo688M4eL3/1Sfv0Ym3MzPo2X/GYmVlWDh4zM8vKwWNmZlk5eMzMLCsHj5mZZeXgMTOzrBw8ZmaWlYPHzMyycvCYmVlWDh4zM8vKwWNmZlk5eMzMLCsHj5mZZeXgMTOzrBw8ZmaWVcOCR9JISTdLelTSfEmnpfLNJc2W9ET6OziVS9J5khZIekjS7qV1TUzzPyFpYql8D0kPp2XOk6Rq2zAzs+Zr5BXPKuBLEbEzMBY4RdLOwJnAnIgYDcxJ4wAHA6PTaxIwGYoQAc4C9gL2BM4qBclk4MTScuNTeVfbMDOzJmtY8ETEkoi4Pw2/AjwGDAcOBaal2aYBh6XhQ4HLonA3MEjSlsCHgdkRsSIiXgRmA+PTtE0j4u6ICOCyTuuqtA0zM2uyLPd4JI0CdgPuAYZFxJI0aSkwLA0PBxaWFluUyqqVL6pQTpVtdK7XJElzJc1dvnx5/TtmZmZ1a3jwSNoE+AVwekS8XJ6WrlSikduvto2IuCgixkTEmKFDhzayGmZmljQ0eCRtSBE6V0TEtan4udRMRvq7LJUvBkaWFh+RyqqVj6hQXm0bZmbWZI3s1SbgUuCxiPhhadIMoKNn2kTg+lL5hNS7bSzwUmoumwWMkzQ4dSoYB8xK016WNDZta0KndVXahpmZNVn/Bq57X+AY4GFJD6ayrwPfBa6RdDzwDHBEmjYTOARYALwGHAcQESsknQPcl+b7VkSsSMMnA1OBgcAN6UWVbZiZWZM1LHgi4reAuph8YIX5Azili3VNAaZUKJ8L7FKh/IVK2zAzs+bzkwvMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg4eMzPLysFjZmZZOXjMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg4eMzPLysFjZmZZOXjMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg4eMzPLysFjZmZZOXjMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg4eMzPLysFjZmZZOXjMzCwrB4+ZmWXl4DEzs6wcPGZmlpWDx8zMsnLwmJlZVg0LHklTJC2T9Eip7GxJiyU9mF6HlKZ9TdICSb+X9OFS+fhUtkDSmaXybSXdk8qvljQglW+Uxhek6aMatY9mZla/Rl7xTAXGVyg/NyJ2Ta+ZAJJ2Bo4C3pOWOV9SP0n9gJ8CBwM7A0eneQG+l9a1A/AicHwqPx54MZWfm+YzM7M+omHBExG3AStqnP1Q4KqI+GtEPAUsAPZMrwUR8WREvA5cBRwqScABwPS0/DTgsNK6pqXh6cCBaX4zM+sDmnGP51RJD6WmuMGpbDiwsDTPolTWVfkWwMqIWNWpfK11pekvpfnfQtIkSXMlzV2+fPm675mZmXUrd/BMBrYHdgWWAD/IvP21RMRFETEmIsYMHTq0mVUxM2sbWYMnIp6LiDciYjVwMUVTGsBiYGRp1hGprKvyF4BBkvp3Kl9rXWn6Zml+MzPrA7IGj6QtS6MfBzp6vM0Ajko90rYFRgP3AvcBo1MPtgEUHRBmREQANwOHp+UnAteX1jUxDR8O3JTmNzOzPqB/97P0jKQrgf2BIZIWAWcB+0vaFQjgaeAkgIiYL+ka4FFgFXBKRLyR1nMqMAvoB0yJiPlpE2cAV0n6V+AB4NJUfilwuaQFFJ0bjmrUPpqZWf0aFjwRcXSF4ksrlHXM/23g2xXKZwIzK5Q/yZqmunL5X4BP1VVZMzPLxk8uMDOzrBw8ZmaWlYPHzMyycvCYmVlWDh4zM8vKwWNmZlk5eMzMLCsHj5mZZeXgMTOzrBw8fcEG/ZHU49fwkVs3ew/MzGrWsEfmWB1Wr+LIC+/s8eJXn7RPL1bGzKyxfMVjZmZZOXjMzCyrmoJH0r61lJmZmXWn1iue/6ixzMzMrKqqnQsk7Q3sAwyV9M+lSZtS/DCbmZlZXbrr1TYA2CTN945S+cus+dlpMzOzmlUNnoi4FbhV0tSIeCZTnczMrIXV+v94NpJ0ETCqvExEHNCISpmZWeuqNXh+DlwAXAK80bjqmJlZq6s1eFZFxOSG1sTMzNpCrd2pfynpZElbStq849XQmpmZWUuq9YpnYvr7lVJZANv1bnXMzKzV1RQ8EbFtoytiZmbtoabgkTShUnlEXNa71TEzs1ZXa1Pb+0rDbwMOBO4HHDxmZlaXWpva/qk8LmkQcFVDamRmZi2tpz+L8CfA933MzKxutd7j+SVFLzYoHg76buCaRlXKzMxaV633eL5fGl4FPBMRixpQHzMza3E1NbWlh4U+TvGE6sHA642slJmZta5af4H0COBe4FPAEcA9kvyzCGZmVrdam9q+AbwvIpYBSBoK/AaY3qiKmZlZa6q1V9sGHaGTvFDHsmZmZm+q9Yrn15JmAVem8SOBmY2pkpmZtbKqwSNpB2BYRHxF0ieA/dKku4ArGl05MzNrPd1d8fwI+BpARFwLXAsg6e/TtI82tHZmZtZyurtPMywiHu5cmMpGVVtQ0hRJyyQ9UirbXNJsSU+kv4NTuSSdJ2mBpIck7V5aZmKa/wlJE0vle0h6OC1zniRV24aZmfUN3QXPoCrTBnaz7FRgfKeyM4E5ETEamJPGAQ4GRqfXJGAyFCECnAXsBewJnFUKksnAiaXlxnezDTMz6wO6C565kk7sXCjpBGBetQUj4jZgRafiQ4FpaXgacFip/LIo3A0MkrQl8GFgdkSsiIgXgdnA+DRt04i4OyKC4inZh3WzDTMz6wO6u8dzOnCdpE+zJmjGAAOAj/dge8MiYkkaXgoMS8PDgYWl+RalsmrliyqUV9uGmZn1AVWDJyKeA/aR9EFgl1T8PxFx07puOCJCUnQ/Z+O2IWkSRdMeW2+9dSOrYmZmSa2/x3MzcHMvbO85SVtGxJLUXNbxn1IXAyNL841IZYuB/TuV35LKR1SYv9o23iIiLgIuAhgzZkxDQ9DMzAq5nz4wA+jomTYRuL5UPiH1bhsLvJSay2YB4yQNTp0KxgGz0rSXJY1NvdkmdFpXpW2YmVkfUOuTC+om6UqKq5UhkhZR9E77LnCNpOOBZygeOArFUxAOARYArwHHAUTECknnAPel+b4VER0dFk6m6Dk3ELghvaiyDTMz6wMaFjwRcXQXkw6sMG8Ap3SxninAlArlc1lz36lc/kKlbZiZWd/gB32amVlWDh4zM8vKwWNmZlk5eMzMLCsHj5mZZeXgMTOzrBw8ZmaWlYPHzMyycvCYmVlWDh4zM8vKwdMKNuiPpB6/ho/0T0KYWT4Ne1abZbR6FUdeeGePF7/6pH16sTJmZtX5isfMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zMwsKwePmZll5eAxM7OsHDxmZpaVg8fMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zMwsKwePmZll5eAxM7OsHDxmZpaVg8fMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zMwsKwePmZll5eAx2KA/knr8Gj5y62bvgZmtR/o3uwLWB6xexZEX3tnjxa8+aZ9erIyZtTpf8ZiZWVYOHjMzy6opwSPpaUkPS3pQ0txUtrmk2ZKeSH8Hp3JJOk/SAkkPSdq9tJ6Jaf4nJE0sle+R1r8gLav8e2lmZpU084rngxGxa0SMSeNnAnMiYjQwJ40DHAyMTq9JwGQoggo4C9gL2BM4qyOs0jwnlpYb3/jdMTOzWvSlprZDgWlpeBpwWKn8sijcDQyStCXwYWB2RKyIiBeB2cD4NG3TiLg7IgK4rLQuMzNrsmYFTwA3SponaVIqGxYRS9LwUmBYGh4OLCwtuyiVVStfVKH8LSRNkjRX0tzly5evy/6YmVmNmtWder+IWCzpncBsSY+XJ0ZESIpGVyIiLgIuAhgzZkzDt2dmZk264omIxenvMuA6ins0z6VmMtLfZWn2xcDI0uIjUlm18hEVys3MrA/IHjySNpb0jo5hYBzwCDAD6OiZNhG4Pg3PACak3m1jgZdSk9wsYJykwalTwThgVpr2sqSxqTfbhNK6zMysyZrR1DYMuC71cO4P/Cwifi3pPuAaSccDzwBHpPlnAocAC4DXgOMAImKFpHOA+9J834qIFWn4ZGAqMBC4Ib3MzKwPyB48EfEk8N4K5S8AB1YoD+CULtY1BZhSoXwusMs6V9bMzHpdX+pObWZmbcDBY2ZmWTl4zMwsKwePrTv/no+Z1cG/x2Przr/nY2Z18BWPmZll5eAxM7OsHDxmZpaVg8fMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zMwsKwePNZ+ffGDWVvzkAms+P/nArK34isfMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zMwsKwePrf/cHdtsveLu1Lb+c3dss/WKr3jMzCwrB4+ZmWXl4DEzs6wcPGbunGCWlTsXmLlzgllWvuIxM7OsHDxmZpaVg8dsXfkekVldfI/HbF35HpFZXXzFY2ZmWTl4zJrNTXXWZtzUZtZsbqqzNuMrHrP1na+YbD3jKx6z9d26XjF9/gNI6vHyW40YyeKFf+zx8tZ+HDxm7c7BZZk5eMxs3Ti4rE4tGzySxgM/BvoBl0TEd5tcJTOrxMHVdloyeCT1A34KfAhYBNwnaUZEPNrcmplZr2tycPXbcCPe+Ntfm7Z8b6wjd/i2ZPAAewILIuJJAElXAYcCDh4zW1svdGdv5vK9VYecFBFZN5iDpMOB8RFxQho/BtgrIk7tNN8kYFIafRfw+zo2MwR4vhequ77y/nv/vf/tq7z/20TE0HoWbtUrnppExEXART1ZVtLciBjTy1Vab3j/vf/ef+9/T5dv1f9AuhgYWRofkcrMzKzJWjV47gNGS9pW0gDgKGBGk+tkZma0aFNbRKySdCowi6I79ZSImN/Lm+lRE10L8f63N+9/e1un/W/JzgVmZtZ3tWpTm5mZ9VEOHjMzy8rBUydJ4yX9XtICSWc2uz6NJmmkpJslPSppvqTTUvnmkmZLeiL9HdzsujaSpH6SHpD0qzS+raR70nlwderE0rIkDZI0XdLjkh6TtHc7nQOSvpjO/0ckXSnpba18DkiaImmZpEdKZRXfbxXOS8fhIUm7d7d+B08dSo/iORjYGTha0s7NrVXDrQK+FBE7A2OBU9I+nwnMiYjRwJw03spOAx4rjX8PODcidgBeBI5vSq3y+THw64jYCXgvxbFoi3NA0nDgC8CYiNiFosPSUbT2OTAVGN+prKv3+2BgdHpNAiZ3t3IHT33efBRPRLwOdDyKp2VFxJKIuD8Nv0LxgTOcYr+npdmmAYc1p4aNJ2kE8A/AJWlcwAHA9DRLq+//ZsAHgEsBIuL1iFhJG50DFD2AB0rqD7wdWEILnwMRcRuwolNxV+/3ocBlUbgbGCRpy2rrd/DUZziwsDS+KJW1BUmjgN2Ae4BhEbEkTVoKDGtStXL4EfBVYHUa3wJYGRGr0nirnwfbAsuB/0zNjZdI2pg2OQciYjHwfeCPFIHzEjCP9joHoOv3u+7PRQeP1UTSJsAvgNMj4uXytCj65Ldkv3xJHwGWRcS8ZtelifoDuwOTI2I34E90alZr8XNgMMW3+m2BrYCNeWszVFtZ1/fbwVOftnwUj6QNKULnioi4NhU/13E5nf4ua1b9Gmxf4GOSnqZoWj2A4n7HoNTsAq1/HiwCFkXEPWl8OkUQtcs5cBDwVEQsj4i/AddSnBftdA5A1+933Z+LDp76tN2jeNL9jEuBxyLih6VJM4CJaXgicH3uuuUQEV+LiBERMYri/b4pIj4N3AwcnmZr2f0HiIilwEJJ70pFB1L8xEhbnAMUTWxjJb09/Xvo2P+2OQeSrt7vGcCE1LttLPBSqUmuIj+5oE6SDqFo8+94FM+3m1ylhpK0H3A78DBr7nF8neI+zzXA1sAzwBER0flmZEuRtD/w5Yj4iKTtKK6ANgceAD4TEev2a159mKRdKTpXDACeBI6j+OLaFueApG8CR1L08nwAOIHiPkZLngOSrgT2p/j5g+eAs4D/psL7ncL4JxTNj68Bx0XE3Krrd/CYmVlObmozM7OsHDxmZpaVg8fMzLJy8JiZWVYOHjMzy8rBY2ZmWTl4zLoh6QvppwCuaHZduiLpWElbdTPPLZLG5KqTWVf6dz+LWds7GTgoIhZ1FEjqX3pAZF9wLPAI8GyT62HWLV/xmFUh6QJgO+AGSS9JulzSHcDlkkZJul3S/em1T1pmA0nnpx9Nmy1ppqTD07SnJX1H0oOS5kraXdIsSX+Q9LnSdr8i6b70w1rfTGWj0pXXxelHyW6UNDCtewxwRVrvwBr2a5yku1K9f54eAttRv2+m8ocl7dTrB9XanoPHrIqI+BzFVcQHgXMpfgDwoIg4muIhiR+KiN0pHqdyXlrsE8CoNO8xwN6dVvvHiNiV4lFEUyme9zUW6AiYcRQ/qrUnsCuwh6QPpGVHAz+NiPcAK4FPRsR0YC7w6YjYNSL+XG2fJA0B/iXtx+5p2X8uzfJ8Kp8MfLmGw2RWFze1mdVnRumDfUPgJ+k5Zm8AO6by/YCfR8RqYKmkmzuvI/19GNgk/cDeK5L+KmkQMC69HkjzbUIROH+keEryg6l8HkXA1WssRSjeUTxmiwHAXaXpHU8gn0cRoma9ysFjVp8/lYa/SPEAxfdStB78pcZ1dDxIcnVpuGO8PyDgOxFxYXmh9EN85fnfALptVqtAwOx01Vatfm/gzwhrADe1mfXcZsCSdGVzDMUTywHuAD6Z7vUMo3jKbz1mAZ8t3XcZLumd3SzzCvCOGtd/N7CvpB3S+jeWtGM3y5j1GgePWc+dD0yU9DtgJ9ZcDf2C4sfTHgX+C7if4ueSaxIRNwI/A+6S9DDFD691FypTgQtq6VwQEcspesFdKekhimY2dyKwbPyzCGYNIGmTiHhV0hbAvcC+6QfVzNqe22/NGuNXqaPAAOAch47ZGr7iMWsxkq4Dtu1UfEZEzGpGfcw6c/CYmVlW7lxgZmZZOXjMzCwrB4+ZmWXl4DEzs6z+PwJxqravfloxAAAAAElFTkSuQmCC\n",
            "text/plain": [
              "\u003cFigure size 432x288 with 1 Axes\u003e"
            ]
          },
          "metadata": {
            "needs_background": "light",
            "tags": []
          },
          "output_type": "display_data"
        }
      ],
      "source": [
        "_ = sns.histplot(full_fragment_df, x='fragment_len', bins=range(1, 100, 5))\n",
        "_ = plt.title('Histogram of Lys-C digestion fragments')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tjOMFB1UZIkZ",
        "outputId": "82bea481-7ac4-40db-e1c6-bbf543ede283"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "20380"
            ]
          },
          "execution_count": 6,
          "metadata": {
            "tags": []
          },
          "output_type": "execute_result"
        }
      ],
      "source": [
        "num_proteins_total = full_fragment_df['protein_num'].nunique()\n",
        "num_proteins_total"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Tk09FhWFj-jR"
      },
      "source": [
        "# Generate binder sets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "N7vKutkIZMds",
        "outputId": "45aed489-e203-437a-9ed6-3622a526ad58"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "400"
            ]
          },
          "execution_count": 7,
          "metadata": {
            "tags": []
          },
          "output_type": "execute_result"
        }
      ],
      "source": [
        "amino_acids = list('ACDEFGHIKLMNPQRSTVWY')\n",
        "dipeptides = [x + y for x in amino_acids for y in amino_acids]\n",
        "len(dipeptides)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "yI9wsfCsbFAS"
      },
      "outputs": [],
      "source": [
        "BINDER_SEP = '-'\n",
        "TARGET_SEP = ':'\n",
        "\n",
        "def generate_binder_set(num_dipeptides, num_binder):\n",
        "  binder_list = []\n",
        "  for _ in range(num_binder):\n",
        "    # Each binder will bind to a random selection of dipeptide targets.\n",
        "    targets = np.random.choice(dipeptides, size=num_dipeptides, replace=False)\n",
        "    binder = TARGET_SEP.join(targets)\n",
        "    binder_list.append(binder)\n",
        "  binder_set = BINDER_SEP.join(binder_list)\n",
        "  return binder_set"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JffUVCqgpLy-",
        "outputId": "61a06a9d-08c8-49f4-8901-9ae3e4ece4e3"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "WW:KS-FI:AF-SW:AT\n"
          ]
        }
      ],
      "source": [
        "np.random.seed(12345) # Set random seed for deterministic behavior\n",
        "binder_set = generate_binder_set(num_dipeptides=2, num_binder=3)\n",
        "print(binder_set)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DfL8gaqTceda"
      },
      "source": [
        "A binder set is chosen randomly and is represented by a string.\n",
        "\n",
        "The character - separates each binder.\n",
        "\n",
        "The character : separates the dipeptide targets that a binder binds to.\n",
        "\n",
        "So, if the binder set is WW:KS_FI:AF_SW:AT then there are three binders. The first binds to WW and KS. The second binds to FI and AF. The third binds to SW and AT."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "C9AV3BehYr2u"
      },
      "source": [
        "# Read a fragment as barcodes using a binder set "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "mo4_hu35z0Ou",
        "outputId": "b1e9f8f5-468c-4e40-cd82-ca20dc650dc7"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\u003cdiv\u003e\n",
              "\u003cstyle scoped\u003e\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "\u003c/style\u003e\n",
              "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n",
              "  \u003cthead\u003e\n",
              "    \u003ctr style=\"text-align: right;\"\u003e\n",
              "      \u003cth\u003e\u003c/th\u003e\n",
              "      \u003cth\u003eprotein_num\u003c/th\u003e\n",
              "      \u003cth\u003efragment_num\u003c/th\u003e\n",
              "      \u003cth\u003eraw_fragment\u003c/th\u003e\n",
              "      \u003cth\u003efragment_len\u003c/th\u003e\n",
              "      \u003cth\u003epadded_fragment\u003c/th\u003e\n",
              "    \u003c/tr\u003e\n",
              "  \u003c/thead\u003e\n",
              "  \u003ctbody\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e0\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003eMWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTL...\u003c/td\u003e\n",
              "      \u003ctd\u003e81\u003c/td\u003e\n",
              "      \u003ctd\u003eMWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTL...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e1\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e1\u003c/td\u003e\n",
              "      \u003ctd\u003eEITK\u003c/td\u003e\n",
              "      \u003ctd\u003e4\u003c/td\u003e\n",
              "      \u003ctd\u003eEITKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e2\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "      \u003ctd\u003eHWEWLENNLLQTLSIFDSEEDITTFVK\u003c/td\u003e\n",
              "      \u003ctd\u003e27\u003c/td\u003e\n",
              "      \u003ctd\u003eHWEWLENNLLQTLSIFDSEEDITTFVKZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e3\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e3\u003c/td\u003e\n",
              "      \u003ctd\u003eGK\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "      \u003ctd\u003eGKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e4\u003c/th\u003e\n",
              "      \u003ctd\u003e0\u003c/td\u003e\n",
              "      \u003ctd\u003e4\u003c/td\u003e\n",
              "      \u003ctd\u003eIHGIIAEENK\u003c/td\u003e\n",
              "      \u003ctd\u003e10\u003c/td\u003e\n",
              "      \u003ctd\u003eIHGIIAEENKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e...\u003c/th\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "      \u003ctd\u003e...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670057\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e65\u003c/td\u003e\n",
              "      \u003ctd\u003eNEEDK\u003c/td\u003e\n",
              "      \u003ctd\u003e5\u003c/td\u003e\n",
              "      \u003ctd\u003eNEEDKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670058\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e66\u003c/td\u003e\n",
              "      \u003ctd\u003eLK\u003c/td\u003e\n",
              "      \u003ctd\u003e2\u003c/td\u003e\n",
              "      \u003ctd\u003eLKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670059\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e67\u003c/td\u003e\n",
              "      \u003ctd\u003eDWEGGLDEQRLSADSGYIIPLPDIDPVPEEEDLGK\u003c/td\u003e\n",
              "      \u003ctd\u003e35\u003c/td\u003e\n",
              "      \u003ctd\u003eDWEGGLDEQRLSADSGYIIPLPDIDPVPEEEDLGKZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670060\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e68\u003c/td\u003e\n",
              "      \u003ctd\u003eRNRHSSQTSEESAIETGSSSSTFIK\u003c/td\u003e\n",
              "      \u003ctd\u003e25\u003c/td\u003e\n",
              "      \u003ctd\u003eRNRHSSQTSEESAIETGSSSSTFIKZZZZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "    \u003ctr\u003e\n",
              "      \u003cth\u003e670061\u003c/th\u003e\n",
              "      \u003ctd\u003e20379\u003c/td\u003e\n",
              "      \u003ctd\u003e69\u003c/td\u003e\n",
              "      \u003ctd\u003eREDETIEDIDMMDDIGIDSSDLVEDSFL\u003c/td\u003e\n",
              "      \u003ctd\u003e28\u003c/td\u003e\n",
              "      \u003ctd\u003eREDETIEDIDMMDDIGIDSSDLVEDSFLZZZZZZZZZZZZZZZZZZ...\u003c/td\u003e\n",
              "    \u003c/tr\u003e\n",
              "  \u003c/tbody\u003e\n",
              "\u003c/table\u003e\n",
              "\u003cp\u003e670062 rows × 5 columns\u003c/p\u003e\n",
              "\u003c/div\u003e"
            ],
            "text/plain": [
              "        protein_num  ...                                    padded_fragment\n",
              "0                 0  ...  MWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTL...\n",
              "1                 0  ...  EITKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\n",
              "2                 0  ...  HWEWLENNLLQTLSIFDSEEDITTFVKZZZZZZZZZZZZZZZZZZZ...\n",
              "3                 0  ...  GKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\n",
              "4                 0  ...  IHGIIAEENKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\n",
              "...             ...  ...                                                ...\n",
              "670057        20379  ...  NEEDKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\n",
              "670058        20379  ...  LKZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ...\n",
              "670059        20379  ...  DWEGGLDEQRLSADSGYIIPLPDIDPVPEEEDLGKZZZZZZZZZZZ...\n",
              "670060        20379  ...  RNRHSSQTSEESAIETGSSSSTFIKZZZZZZZZZZZZZZZZZZZZZ...\n",
              "670061        20379  ...  REDETIEDIDMMDDIGIDSSDLVEDSFLZZZZZZZZZZZZZZZZZZ...\n",
              "\n",
              "[670062 rows x 5 columns]"
            ]
          },
          "execution_count": 10,
          "metadata": {
            "tags": []
          },
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Make fragments padded to constant length using non-amino acid overflow character\n",
        "READ_OVERFLOW = 'Z'\n",
        "max_len = full_fragment_df['fragment_len'].max()\n",
        "full_fragment_df['padded_fragment'] = full_fragment_df['raw_fragment'].str.pad(max_len, side='right', fillchar=READ_OVERFLOW)\n",
        "full_fragment_df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "6TF4ngBbTDcW"
      },
      "outputs": [],
      "source": [
        "NO_BARCODE = '_'\n",
        "\n",
        "def get_barcode_dict(binder_set):\n",
        "  '''Convert a binder set into a dict that gives the binder for a target.'''\n",
        "  barcode_dict = collections.defaultdict(lambda: NO_BARCODE)\n",
        "  for i, binder in enumerate(binder_set.split(BINDER_SEP)):\n",
        "    for target in binder.split(TARGET_SEP):\n",
        "      barcode_dict[target] = str(i)\n",
        "      # More than one binder could bind to the same target.\n",
        "      # To get a lower bound on the number of proteins identified map the target\n",
        "      # to just one of the binders.\n",
        "  return barcode_dict"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pPgLFka-phyA",
        "outputId": "cfd6afcd-c605-4a54-bcdb-09525a6c6e56"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "defaultdict(\u003cfunction get_barcode_dict.\u003clocals\u003e.\u003clambda\u003e at 0x7f00b2a49320\u003e, {'WW': '0', 'KS': '0', 'FI': '1', 'AF': '1', 'SW': '2', 'AT': '2'})\n"
          ]
        }
      ],
      "source": [
        "barcode_dict = get_barcode_dict(binder_set)\n",
        "print(barcode_dict)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KUOySzO_eYli"
      },
      "outputs": [],
      "source": [
        "DIPEPTIDE_LEN = 2\n",
        "\n",
        "def get_barcode_read(fragment, read_length, binder_set):\n",
        "  barcode_dict = get_barcode_dict(binder_set)\n",
        "  return ''.join(barcode_dict[fragment[i:i+DIPEPTIDE_LEN]] for i in range(read_length))\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eQkKZ3dzprsa",
        "outputId": "453c3d82-1d9f-4a4e-eebc-80231efb6146"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "_20_1____1__\n"
          ]
        }
      ],
      "source": [
        "binder_set = 'WW:KS-FI:AF-SW:AT'\n",
        "fragment = 'RSWWAFDDDAFDDDDD'  # Made up for illustrative purposes.\n",
        "barcode_read = get_barcode_read(fragment, 12, binder_set)\n",
        "print(barcode_read)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nen1KtBXgipU"
      },
      "source": [
        "In this example the the fragment 'RSWWAFDDDAFDDDDD' is read using the binder set 'WW:KS-FI:AF-SW:AT'. There are 12 binding cycles each ending with a single amino acid removed by edman degradation.\n",
        "\n",
        "```\n",
        "\n",
        "example_fragment = 'RSWWAFDDDAFDDDDD'\n",
        "example_binder_set = 'WW:KS-FI:AF-SW:AT'\n",
        "binder_dict = {\n",
        "    'AF': '1',\n",
        "    'AT': '2',\n",
        "    'FI': '1',\n",
        "    'KS': '0',\n",
        "    'SW': '2',\n",
        "    'WW': '0'\n",
        "    }\n",
        "\n",
        "# read result is '_20_1____1__'\n",
        "```\n",
        "\n",
        "On the first cycle, the end dipeptide is RS. There is no binder in the set for that dipeptide target so no barcode is left for that cycle.\n",
        "\n",
        "On the next cycle, the end dipeptide is SW (the R has been removed). Binder #2 targets that dipeptide and so the bacode '2' is left. The barcode sequence would indicate both binder and cycle number.\n",
        "\n",
        "On the next cycle, the end dipeptide is WW (the S has been removed). Binder #0 targets that dipeptide and so the bacode '0' is left.\n",
        "\n",
        "The process continues until the last cycle number. The barcodes read would be: Binder #2 on cycle 2, Binder #0 on cycle 3, Binder #1 on cycle 5, Binder #1 on cycle 10. This is represented here as the string '_20_1____1__'.\n",
        "\n",
        "A fragement is matched to a specific protein if the barcode read is unique i.e. no other protein fragment gets the same barcode sequence read.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4i7k_sTDkc-d"
      },
      "source": [
        "# Fraction of proteome identified by different binder sets"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_0YPNcxNgEH3"
      },
      "outputs": [],
      "source": [
        "READ_LENGTH = 12\n",
        "\n",
        "def try_binder_set(binder_set):\n",
        "  full_fragment_df['temp_read'] = full_fragment_df['padded_fragment'].apply(lambda x: get_barcode_read(x, READ_LENGTH, binder_set))\n",
        "  # Handle the case where a read appears multiple times in the same protein.\n",
        "  temp_fragment_df = full_fragment_df.drop_duplicates(\n",
        "      subset=['temp_read', 'protein_num'], keep='first')\n",
        "  num_identified_proteins = temp_fragment_df.drop_duplicates(subset='temp_read', keep=False)['protein_num'].nunique()\n",
        "  return num_identified_proteins"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "26idT-YmlW2L",
        "outputId": "cf08e475-4124-42e2-d2ea-f2f425fdc7e6"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "binder_set =  WW:KS-FI:AF-SW:AT\n",
            "num_identified_proteins =  124\n",
            "Proteome identified: 0.6%\n"
          ]
        }
      ],
      "source": [
        "# Look at the performance for a binder set.\n",
        "binder_set = 'WW:KS-FI:AF-SW:AT'\n",
        "print('binder_set = ', binder_set)\n",
        "num_identified_proteins = try_binder_set(binder_set)\n",
        "print('num_identified_proteins = ', num_identified_proteins)\n",
        "print(\"Proteome identified: {:.1%}\".format(1. * num_identified_proteins / num_proteins_total))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "K-OhIA2cnklg",
        "outputId": "aaed3739-254d-483f-8bb2-51238736f4d1"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "num_dipeptides =  8\n",
            "num_binder =  10\n",
            "binder_set =  WW:KS:YT:MQ:GM:SC:PM:YC-FI:AF:VA:QR:RA:PV:KM:TW-SW:AT:VL:VC:WR:AC:KD:NH-GQ:QA:HD:SK:SV:TH:MW:QR-VQ:VK:IH:CT:QR:IR:TS:DG-LR:EG:FW:QW:EC:CQ:NE:PA-DV:GE:AD:IT:ET:TP:YN:YF-NF:HD:DG:TM:AL:CY:VE:QY-LM:DV:AR:KT:MQ:EK:NR:CG-FE:VF:DE:EF:QK:ME:AQ:RC\n",
            "num_identified_proteins =  17553\n",
            "Proteome identified: 86.1%\n"
          ]
        }
      ],
      "source": [
        "# Look at the performance for a larger binder set with more targets each.\n",
        "num_dipeptides=8\n",
        "print('num_dipeptides = ', num_dipeptides)\n",
        "num_binder=10\n",
        "print('num_binder = ', num_binder)\n",
        "np.random.seed(12345) # Set random seed for deterministic behavior\n",
        "binder_set = generate_binder_set(num_dipeptides=num_dipeptides, num_binder=num_binder)\n",
        "print('binder_set = ', binder_set)\n",
        "num_identified_proteins = try_binder_set(binder_set)\n",
        "print('num_identified_proteins = ', num_identified_proteins)\n",
        "print(\"Proteome identified: {:.1%}\".format(1. * num_identified_proteins / num_proteins_total))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aZANfLMYrrMk"
      },
      "source": [
        "# Evaluate proteome identification for binder sets with a range of properties"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "esGjnIvwNC1P"
      },
      "outputs": [],
      "source": [
        "results = []\n",
        "np.random.seed(12345) # Set random seed for deterministic behavior"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Z5e4LksVr9cQ"
      },
      "outputs": [],
      "source": [
        "# Small number of properties to evaluate (for faster run time).\n",
        "RANGE_NUM_BINDERS_IN_SET = [1, 5, 10, 15]\n",
        "RANGE_NUM_TARGETS_PER_BINDER = [1, 4, 8, 350]\n",
        "NUM_SAMPLES_PER_CONDITION = 1\n",
        "\n",
        "# Values used to generate the results in the paper.\n",
        "# RANGE_NUM_BINDERS_IN_SET = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 25, 50, 75, 100]\n",
        "# RANGE_NUM_TARGETS_PER_BINDER = [1, 2, 3, 4, 5, 6, 7, 8, 9, 25, 50, 100, 150, 200, 250, 300, 400]\n",
        "# NUM_SAMPLES_PER_CONDITION = 20"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Rkr3fb2xog_e",
        "outputId": "6999965f-4667-428d-f4d6-552ddab80168"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "num_binder=1, num_dipeptides=1, sample=0, binder_set=WW, num_identified_proteins=6\n",
            "0.0%\n",
            "num_binder=5, num_dipeptides=1, sample=0, binder_set=FI-SW-GQ-VQ-LR, num_identified_proteins=433\n",
            "2.1%\n",
            "num_binder=10, num_dipeptides=1, sample=0, binder_set=DV-NF-LM-FE-RN-IQ-TH-FK-CA-LQ, num_identified_proteins=1830\n",
            "9.0%\n",
            "num_binder=15, num_dipeptides=1, sample=0, binder_set=MG-PP-WN-ES-PN-CI-DL-PK-CH-IY-AH-GG-GK-IC-MS, num_identified_proteins=4019\n",
            "19.7%\n",
            "num_binder=1, num_dipeptides=4, sample=0, binder_set=TI:IH:ME:MN, num_identified_proteins=33\n",
            "0.2%\n",
            "num_binder=5, num_dipeptides=4, sample=0, binder_set=VT:LW:HP:AA-GP:ND:MP:LC-ES:HI:QK:SI-VS:HS:FY:DH-SR:CN:EY:IN, num_identified_proteins=3957\n",
            "19.4%\n",
            "num_binder=10, num_dipeptides=4, sample=0, binder_set=SE:YR:KE:LP-VR:IY:IE:CN-WM:IH:TE:EN-TD:WI:PW:YL-KR:HP:YT:CL-TY:CL:LG:LL-HQ:ND:YK:FV-TM:WL:AE:FE-TG:RA:IR:CA-YS:IA:YN:NT, num_identified_proteins=11668\n",
            "57.3%\n",
            "num_binder=15, num_dipeptides=4, sample=0, binder_set=KD:AN:VG:VR-TR:RP:WS:SI-YN:TQ:LG:IA-RI:NW:NT:WW-IM:FK:GP:PH-HH:RM:WY:VF-VK:AI:SQ:QN-IW:IN:QC:IH-CG:VH:VF:QH-SC:AA:DW:LE-QT:AS:MC:HL-IL:GW:DA:AP-FW:DL:DV:DI-SE:RT:RD:FV-TW:DV:WM:SE, num_identified_proteins=16485\n",
            "80.9%\n",
            "num_binder=1, num_dipeptides=8, sample=0, binder_set=FR:HL:PQ:NM:AE:KV:HF:KS, num_identified_proteins=65\n",
            "0.3%\n",
            "num_binder=5, num_dipeptides=8, sample=0, binder_set=FV:WG:DK:SG:GL:NP:QK:AT-IV:CH:EI:GV:IW:KT:WW:LN-AS:NQ:KN:YC:AP:KY:QE:WI-QT:GH:YY:SE:AF:MR:SR:KK-RM:KE:AH:VL:NA:GN:YW:PN, num_identified_proteins=8265\n",
            "40.6%\n",
            "num_binder=10, num_dipeptides=8, sample=0, binder_set=NG:HH:IS:GS:SN:AD:KL:HG-IQ:LY:TI:PI:NR:KC:FQ:FP-GH:SV:HS:DE:ES:VN:WI:NM-TG:DT:KM:RL:FP:YG:VC:HD-EI:VP:GL:ES:VL:HY:TV:IT-MH:WS:VC:TT:QE:RA:EN:CN-PV:QY:SW:DN:IN:KF:AG:TY-EY:DH:AS:WA:VQ:PS:MG:PW-CH:VS:TW:TK:NR:IA:QL:LR-AD:SL:GT:SA:MG:ME:PI:SG, num_identified_proteins=18283\n",
            "89.7%\n",
            "num_binder=15, num_dipeptides=8, sample=0, binder_set=CS:VQ:AF:WS:QA:TY:QC:VH-GP:TG:HT:YC:QE:FP:RA:RQ-LE:MK:QA:KP:QD:AH:IC:TQ-MR:PH:MQ:MG:QW:LL:YW:IH-YH:MR:PD:MV:FR:CI:VA:NE-KL:LY:TR:SG:RV:VI:RM:MP-SC:QC:DQ:IH:EN:AK:SD:HK-TQ:LA:AV:IT:TG:RM:KS:GF-PL:GI:TY:GF:RM:PT:CR:CY-QL:EW:WW:ST:FP:AE:FQ:VR-CV:AN:IT:SC:LS:IH:RA:QL-NW:SI:DV:ME:YI:NP:AG:MP-QH:QV:TN:HN:QD:GC:MK:YI-AP:WC:SS:TT:PD:AA:QD:HR-DA:WA:WS:NW:CP:IV:HK:DC, num_identified_proteins=19414\n",
            "95.3%\n",
            "num_binder=1, num_dipeptides=350, sample=0, binder_set=AQ:LR:AH:YT:FD:RT:ND:IW:CQ:FQ:RR:VC:LI:TL:MV:PP:TT:TR:CA:EP:WV:II:KF:HH:GY:EK:YN:YP:AF:LG:GL:EC:YY:QY:CR:NI:RN:LF:AW:VW:LN:GQ:WY:FL:HA:LL:MQ:IM:DP:DA:DE:LP:MA:IP:IF:WL:SE:MH:QQ:WN:VP:KK:PA:QA:IG:HQ:EY:HV:WC:AV:EV:LQ:LM:KA:ML:YW:SG:PD:RG:NS:TP:YV:CH:MP:WA:NR:AD:RY:GT:GP:GK:NC:FY:RH:AA:CN:QF:HI:LY:DQ:CE:DH:IY:VL:LS:EA:HY:GS:TG:HP:ID:RQ:TY:WM:PV:TQ:RD:SY:LT:IS:GM:CC:PQ:IL:RK:QC:LK:HF:QS:AI:WR:CT:GA:WD:AS:KE:CG:SQ:MY:HS:HG:RM:KY:LE:RC:GV:VK:ST:PM:IH:EE:YE:YG:GR:FC:PY:SA:KP:YS:NF:PW:FW:VM:DC:AL:NW:VF:FK:NV:PC:QK:FS:HD:ER:RI:ME:MS:YC:DK:AE:TD:IV:EW:HL:EM:TS:FM:VT:PR:MM:KL:WK:FN:DW:GI:RE:VD:YF:CM:WS:LD:PF:QD:HE:HM:NY:VA:LC:HC:KM:MT:TF:ED:KV:LH:SK:GW:YH:DY:AP:QI:EH:IT:EQ:TV:VE:SC:SD:TM:NP:YD:VG:EL:ES:HK:WQ:GD:RS:YQ:FG:NN:MN:NA:AC:GH:FT:YI:SF:RF:TI:MR:NH:GE:DI:AK:GC:AG:QL:DV:VI:QE:AN:SR:FA:KI:IC:IK:NK:VS:PE:KH:CV:AT:SW:WP:TE:MK:QT:EF:WF:PK:SI:KQ:NE:NM:GN:VR:DN:CF:CI:IQ:DR:MG:YM:QM:MC:CS:FE:AM:RA:QR:YA:WE:CY:LV:TK:HT:PS:DL:KS:VQ:YK:RL:WG:PI:CL:SV:SP:NQ:GF:KW:KT:CD:SL:QH:SM:FH:DS:VH:VN:DG:HW:CK:CP:NT:ET:KN:DF:WH:QV:DM:AY:TH:EG:TA:YR:KC:PG:WW:IN, num_identified_proteins=465\n",
            "2.3%\n",
            "num_binder=5, num_dipeptides=350, sample=0, binder_set=HV:PK:WW:QG:VM:HG:PL:WL:TM:SC:QI:MD:FC:WE:EG:EM:DN:CS:DL:RA:DW:VR:VW:MG:HQ:TP:KG:YL:QA:HT:MS:SF:PG:IL:DY:IP:RF:CM:TF:YY:TH:SN:QT:CY:IY:YQ:PI:MM:GP:KE:GI:SQ:CC:WD:RW:LL:PA:DR:FF:IC:GL:KL:PS:QY:LD:MQ:MI:LY:RM:SV:YM:GG:HL:FN:MT:SD:SP:AF:MP:DD:SE:SI:LF:WI:SA:KK:TR:AW:AN:VQ:QM:AV:YW:EL:CL:WR:PH:SW:PN:RN:QR:KD:ND:FD:YG:LH:AQ:RC:CW:WS:PW:TI:GE:FT:EE:NN:FR:AD:AI:QW:LR:MH:RG:FP:YV:ST:FM:YA:NR:YT:QQ:LP:NM:SR:DG:IE:LA:PP:PF:MY:FQ:DV:LQ:NW:EI:EN:EP:YE:GY:NS:LT:YI:FE:FA:FH:NL:WN:DI:HK:FW:NQ:GR:IR:GF:IQ:ET:NV:AE:YR:YN:LG:ID:EK:IM:RE:HR:TK:HN:GW:KC:II:PM:ML:KA:GM:TA:TE:GK:DM:YS:KV:SS:CD:RK:WC:RV:QP:EQ:LW:IN:RD:MR:PQ:MW:PD:RI:QL:QH:TQ:VG:YH:GS:LK:VF:YK:TY:HA:HW:FG:IF:CF:NC:VH:TV:VA:SL:KY:VT:SK:EW:DQ:GT:VV:YD:SY:HM:EF:YF:HH:GC:MA:WH:GH:GD:NK:CE:DC:QV:MN:AA:VS:FY:EV:EH:VI:DE:QD:VD:DP:NF:HI:IT:WY:EY:EA:HD:CH:KM:VK:KR:AG:QK:SM:PC:CN:IW:CQ:CT:VE:NY:LC:NA:VN:AH:RH:QN:QC:HS:NH:VY:IG:FL:IK:WQ:ED:WF:MF:LV:KI:VP:IH:TW:QS:ER:FS:IS:DH:NG:TG:VL:WT:SG:KF:PV:WG:DK:RR:CK:AS:MC:KT:RY:WP:RP:CA:GV:AC:RT:DA:TD:RL:RS:LE:KS:LI:WA:RQ:KW:GQ:GA:WV:LM:CR:AK:EC:NI:PY:TL:CI:HE-AM:DA:QW:IM:CT:PA:EL:PR:VS:RK:WW:EY:SQ:VA:QR:DE:WE:IK:IV:DH:VP:QP:FV:SW:SK:NF:DF:FK:GI:YS:TH:KC:LV:KF:DY:ID:IR:QG:PS:CI:LW:CC:II:RT:GV:ER:VH:SS:RG:AA:TL:LC:DP:FW:YH:RF:QY:LR:YG:WS:EM:PG:FS:GE:TQ:KD:HK:IE:HS:DM:CR:AP:KY:CD:QC:YW:RR:MD:QA:AE:MH:GY:VT:ML:TW:RH:CM:SF:MM:MV:TN:TY:MG:LE:MQ:KM:HN:VG:IT:EA:DS:FG:YC:NP:SD:RQ:HF:DQ:CK:TT:WD:WM:IC:HR:YR:EP:LT:SR:WT:KL:HE:KS:GG:SH:HM:CL:FT:SI:VQ:PM:EK:IH:SA:SY:WI:LS:EF:PI:TA:CE:KK:HP:NW:HY:EW:EC:KA:CQ:AT:HL:LL:QT:VI:HD:NE:DI:HC:MK:HQ:PW:WH:GD:RC:EQ:CS:AQ:DN:YL:IS:GF:MS:HG:AN:FA:MT:LD:MA:AD:TI:NL:ES:DG:CF:FI:MC:LH:MF:FY:SP:PP:QS:AF:NA:TD:WN:RA:ST:NI:MR:RY:MW:FH:WA:HH:FP:KN:DK:QM:LF:PN:RE:NV:WY:PL:LM:TK:DC:FN:AC:YF:QK:HI:QE:YY:DL:LQ:KQ:NQ:HA:FE:FL:GT:MP:FR:QI:YI:EG:WL:VY:PE:GP:AI:AV:CN:RV:AK:VD:WR:WG:KH:IA:GQ:RD:VK:TF:LP:KW:QL:CY:IG:YK:EI:FM:WF:YA:TV:NK:ED:DR:MY:IF:SC:IN:SE:SM:CW:GN:GC:FF:DW:YT:WQ:AW:TS:CG:RM:QD:IQ:IL:NH:ND:TG:LN:MN:RN:TM:ME:KI:GK:EV:SV:PF:WK:PY:RW:IW:LY:VF:GM:AR:IP:VL:AL:TC:VW:PV:QV:RS:EH:HV:GH:ET:TP:PC:PQ:LA:GA:EE:CP:GR:YQ:WV:VC:YE:CH:KE:NM:NC:NR:FC:HW:RP:KR:AG:KT:VM:YD:QN:PD:NG-FR:VE:LF:HE:LD:HF:VP:TH:CT:IH:IT:WP:MC:PK:FC:RG:IM:DS:KD:ER:IE:NP:EM:IS:RL:QH:QD:YL:AH:YP:MW:WA:II:VY:VR:KA:WG:CM:RW:HR:YN:IL:WK:YH:IV:CK:FW:QW:CR:DT:IW:NK:HT:LM:LA:QL:SY:NR:TV:PQ:AN:PY:LE:VM:LL:FH:EL:PI:AP:MQ:IR:TN:EV:RK:KT:MR:PM:LR:WM:QF:QM:TI:MG:HA:NH:MY:SA:TM:HH:RQ:HD:IP:YW:FN:TL:TE:DL:GQ:QK:EA:KV:RN:VN:TS:AM:WH:FY:CN:RH:AQ:NN:AT:NE:HK:LI:QA:RF:IC:CY:AS:PA:MH:VW:DN:VV:PP:LT:MS:KL:GG:EQ:DE:HM:RD:WW:AE:YV:CC:TR:FT:KY:WC:DH:GY:FA:LW:WF:KS:WV:KG:NS:TD:NW:ND:DD:FL:FF:YK:LP:FM:QS:NV:CI:IN:LC:NG:EI:YM:HQ:LQ:KW:GE:NC:QQ:KF:DF:FP:LH:VS:DA:GV:KM:LY:FG:EC:SP:IK:YI:TP:VH:GW:NA:PD:EK:YE:GA:MN:WT:KN:RR:SC:NQ:RV:RE:CE:WQ:VK:WR:CW:GN:RT:SR:PE:CD:WN:NL:MM:KI:AD:QG:SS:PV:IA:MV:EP:SW:ST:TK:GM:SE:DC:MI:PH:ET:AR:CA:LG:PF:SH:PT:WE:PS:YC:KR:YD:RI:AK:FQ:ME:QY:DM:IG:NT:GF:QP:TY:LS:EH:WL:CF:DK:SL:TQ:YR:GI:FD:HY:QE:SQ:SI:FV:NM:RY:CV:AW:HN:SK:VI:AL:PG:GK:MA:DR:FI:TF:HV:WD:DP:GT:EY:VD:CH:RP:IY:EE:QT:MK:QV:YT:DQ:QN:PR:AV:KC:TA:HL:NF:AA:YG:FK:VF:AC:GS:GR:LV:MP:AY:AG:RA:HS:YF:KK:SV:HG:IQ:WY:FE:DI:VQ:TT:ED:PN:SF:VA:KP:EF:YA:MD:VC:SG:HW:WS:GC:DY:RM:SD:KQ:TC:GP:EW-TP:KA:SF:VN:NC:AH:TR:LP:FA:LS:KG:GH:MT:DQ:QK:CT:SW:EK:IQ:CS:IL:VG:CQ:VL:EW:VF:RW:EV:LV:DP:QP:HM:NW:LM:GC:AY:YT:CI:CK:VS:KP:AS:HL:FW:AM:NH:NP:HD:LW:KR:AD:IN:PA:WG:WF:KH:PL:KF:EY:MP:RV:QE:NL:AT:AW:EP:MI:WP:ST:YG:PY:WT:FR:CR:DS:AK:WI:HF:GK:WY:NR:GV:YD:SC:WQ:QC:FK:MF:DD:CG:WS:IH:SM:NY:AL:GQ:PG:PE:WH:RE:AC:EA:MH:AP:VR:WV:IA:RA:FH:MQ:VC:GA:QM:RS:EG:RL:RY:YY:RF:FV:YH:VQ:HN:FC:RR:LF:DK:HH:NK:YP:QL:KY:WN:PV:KC:HP:MN:DM:FG:NT:GN:RQ:GT:VY:FM:CC:NF:RD:YC:YI:KN:NQ:LT:WR:CE:FS:RT:MM:KM:YR:HG:TF:SA:HE:EH:AF:SN:FN:IV:DR:QT:WC:PM:IP:LL:SP:SS:IW:QS:PS:IE:VT:TW:VV:SQ:TC:QW:DF:NV:YW:YQ:KT:EM:IF:IK:DC:EN:HT:QN:DT:PI:IG:QF:ID:IC:LE:AI:DY:GD:YM:KD:IT:LQ:DH:MA:CL:KE:PK:MS:HK:YV:PC:FI:SD:GW:YS:YF:HR:LY:NN:RG:TT:ET:QA:CN:GG:MD:HV:LA:HY:AV:WM:DN:CM:LR:QR:GF:MR:EE:VP:TE:QV:AG:RN:DA:ES:MC:HQ:KS:II:GE:EL:GR:KV:DI:WK:NG:HW:RH:SY:RC:MW:SG:AA:QH:PN:KK:RM:NS:CD:CW:RK:IS:PF:ML:AQ:QD:NI:QY:TL:GM:SK:LK:NE:QQ:DL:VM:GY:TK:HA:YA:GI:WL:PH:LD:TN:CY:DG:NM:SE:MV:GP:YL:AN:IM:LH:PQ:EC:EI:CF:ND:LG:ED:GS:RP:HI:WW:TQ:WD:KW:FT:EQ:EF:PP:MK:TA:SR:GL:DW:ER:TD:KI:HS:VK:AR:AE:TG:TV:FE:YK-PV:EK:EY:DD:PF:WN:YQ:RR:YY:WS:YL:RI:ND:EF:AY:GG:IH:SG:IT:MF:MV:WF:GP:FD:YE:EL:QW:HP:KV:LE:ID:AL:HD:IE:IV:AI:TW:CL:KT:EG:WL:MD:DM:AA:PQ:AV:VN:PD:NQ:NT:KW:IK:CT:QD:DK:DA:NY:GH:DH:YN:NS:CN:DC:HV:CP:IR:DY:DS:MK:DL:RC:ET:WP:WQ:TE:GY:AC:YG:IY:VR:TS:FQ:IW:QY:VV:HF:QT:DW:EW:ER:RL:GM:PL:LN:IS:CK:CD:TM:RH:TF:KM:SR:NN:FT:LL:KA:CQ:KY:WI:FS:RN:QV:VQ:RM:HA:FA:RP:SV:GF:FN:QP:QS:NA:LI:TK:NR:SP:VG:TG:QN:TH:FR:QC:NL:HW:CM:TV:QE:LR:VS:VH:QK:NE:HQ:MW:SF:FY:RD:TY:AM:CR:CV:PW:RE:IC:HS:LF:HE:RG:TP:MQ:VT:RF:EV:DE:CH:AS:GV:MM:VA:ML:SM:KF:MA:TI:LM:NW:IL:SH:MP:QL:PM:VC:YS:IM:TN:CW:QH:KQ:VM:NH:AF:DV:ME:NG:WG:FE:CA:VL:FK:ES:LD:SI:MG:WH:YP:SE:PG:WR:EN:WM:LT:IP:WE:FM:PE:YI:PR:GS:AG:SD:RS:LS:AN:LW:HN:AD:FP:FC:EC:TA:VF:HL:DP:YK:HM:MR:GI:FG:AW:TT:NI:FL:YW:GE:NK:MN:AP:NC:TC:IN:FH:FI:DF:GD:WV:EP:HR:YC:KN:LP:RV:SQ:DN:RT:SK:PK:CC:FV:QF:KH:DQ:QG:CI:YD:NP:QA:KE:GL:LG:SC:AK:YA:WT:SA:NF:VP:MI:WD:RK:SN:RW:ST:KK:LQ:GN:HK:MC:VE:KP:QQ:IG:IF:HI:HT:EA:IA:GK:KR:ED:NM:PS:PP:PN:DT:YH:EM:NV:FF:GQ:YM:KI:VK:FW:TD:TR:PI:GR:AH:YF:GC:HG:VY:KL:PT:DR:WC:II:MY:TQ:EQ:SL:RQ:IQ:SW:GW:LA:MS:CF:KG, num_identified_proteins=4722\n",
            "23.2%\n",
            "num_binder=10, num_dipeptides=350, sample=0, binder_set=QV:DG:FQ:CE:LT:YW:VK:QF:KR:MC:NG:GS:VW:AH:HR:FC:TV:AD:YA:YL:MH:HY:IP:ES:PF:YG:EH:SM:FF:QH:CI:FW:YY:WF:WS:IS:RA:RS:EF:RE:HA:CY:VA:LK:LM:NA:KS:AF:IF:IA:QK:ST:PE:EW:IT:PR:EG:CQ:EV:NV:RR:DN:VC:LY:IH:KE:GR:QG:TK:KD:CR:HE:IK:HT:TW:MA:ID:TH:DD:SR:LR:TM:NC:DK:YD:LW:KT:YE:KY:RF:LD:GN:NW:MV:QM:SC:WD:YP:RN:RK:YN:SN:MN:PD:II:GG:HK:YM:SK:AC:WQ:RV:HC:TR:QY:CG:CL:FP:RG:CM:CP:HP:GH:WH:RY:ER:AM:PQ:LI:SV:HM:NK:MQ:EL:GC:FG:KA:AA:FV:WT:SG:NL:RD:RH:KV:MS:IY:WC:RQ:CF:IM:SQ:QQ:DI:DT:SS:YQ:LL:VT:RP:LQ:FE:KF:GM:AR:FH:WP:HL:HW:WM:DY:YC:NN:PN:FR:LP:NS:ND:HQ:AK:FN:KL:TI:PS:AN:WV:AG:LN:KN:PP:FA:QP:VV:NI:LC:WN:MT:GL:PA:VD:KQ:PC:QA:TD:TG:VE:GK:CH:YT:YH:KM:WY:RW:QS:NE:TF:LE:IR:GV:CS:WG:TY:HN:KK:MY:IN:VP:EA:RC:GI:VS:PI:HD:DC:CD:FK:FY:VG:AL:CA:KW:PK:SI:VN:YS:QI:AQ:AI:WK:GE:SY:HI:GY:DH:YI:YK:SW:TE:DA:LH:QC:TN:LF:SL:RT:NQ:KP:DR:MP:TT:PM:NH:DW:VY:NT:SA:DE:TL:WR:GQ:EI:EE:KI:MR:AW:MF:QD:RL:AE:EC:QN:EN:GW:TP:SH:IV:EP:PY:VH:AY:EM:KG:YF:GT:ML:AP:TA:PW:ME:QE:SE:MG:SD:LG:TC:CN:DL:VL:VR:FL:RM:LA:DQ:QR:DF:DM:ED:YR:DV:IG:PT:HV:PL:CK:AS:CC:MD:QL:IW:MI:QW:EK:FD:GP:WI:WL:IC:GA:GF:IE-ED:NW:QS:NE:FW:GK:HA:AM:HN:HT:HL:FE:WG:YK:MA:DQ:CA:SF:ML:VR:MS:TY:TQ:AL:LF:CF:SS:SM:LV:AW:PV:EF:KR:MD:VH:KG:MY:QN:PW:VF:KK:DE:QA:WL:EM:IN:SY:GA:PK:IQ:PS:YR:AA:SN:LD:VS:SL:HR:KV:CH:WI:HP:WK:SH:TI:AV:WR:NI:CQ:GW:YT:QC:RN:II:TM:QR:DK:CG:MF:KP:AR:CS:AS:NA:EV:MN:RK:MC:QY:FN:SC:LW:LA:NC:VP:WH:FI:EG:FF:KH:DD:FM:RS:VN:RP:TE:NV:CM:HM:NH:QD:IT:FH:LQ:WQ:QM:ME:WM:AK:GH:LI:MH:HK:WD:HD:LC:IW:IA:NR:PM:TA:CD:TR:MP:PR:QT:WT:HH:TV:PL:NP:MM:VL:KF:GC:QK:KN:YP:SA:YW:DT:DF:CW:GD:YD:AT:IP:EN:CT:TG:YI:QI:DI:VW:KY:ST:AF:AG:CE:KT:FS:LE:TL:VI:EC:ID:DV:RH:RC:WC:YN:NS:FC:FT:ER:LH:AP:IC:MQ:DN:FL:WP:EI:NL:QG:HG:CC:TK:AE:GV:RQ:AN:RF:DM:RI:HV:KE:IF:PT:TD:PN:CN:MW:SW:CR:KC:WS:EQ:MT:LN:VV:TS:WA:RR:EW:QE:PP:LP:SK:FQ:HI:TW:LS:PC:GG:PQ:CP:KM:YG:RG:HQ:NY:GE:EL:HS:AD:EK:MV:HY:AC:QW:DA:SR:GS:CK:YY:VD:KS:VY:DL:TH:GF:LY:SQ:IL:IS:QQ:VE:TT:VA:HC:IG:MI:LL:SI:LT:YM:PY:QV:HE:GI:YV:FA:QH:PD:SD:NF:PH:DP:VQ:DW:RM:FD:KI:VG:CL:CY:LM:RD:KL:TC:IV:RY:MG:PF:IY:IK:RA:VK:DG:GM:RT:VT:WY:DC:NM:FR:QP:EP:RV:YS:ND:DS:HF:DR:PG:IH:YC:WW:WF:LR:YL:AY:FK:EH:WV:MK:SV:EE:IR:KA:PE:YE:EY:RW:FG:LK:GR:DH:IM-ET:ES:RW:YT:LF:QL:KN:VR:FR:NQ:HS:YC:IM:NP:DD:VL:LP:EA:AI:KH:VC:CL:RD:GF:YH:WQ:GR:TY:YD:VP:DV:FC:RH:AP:TA:SA:PP:PQ:WV:CD:AS:CP:AN:CM:HF:RV:NL:EE:IQ:KW:NR:IE:SV:QA:KA:YW:TD:KV:RE:TH:GT:AA:MT:HH:SF:GE:NT:DQ:MQ:TE:QY:VM:LV:HN:PL:HT:FI:EH:IN:LR:KF:ND:QR:WS:KQ:KL:PK:DH:GN:HE:TK:QE:MP:QM:QK:HG:FE:PG:WF:CQ:FM:WP:WA:QF:RL:MS:MA:YS:PT:YM:EY:HW:YA:SR:WR:WH:MW:HY:YR:DS:TR:CE:DG:YF:CA:RN:KK:PI:NG:SH:TV:IP:WI:AY:QS:ST:CH:RS:GM:TN:EC:FG:PE:NI:RG:NC:AH:YQ:RQ:WC:SK:LL:VD:SS:PC:LE:IY:AC:PH:GG:LA:EN:LY:CW:FW:CY:AK:WL:IW:DF:MI:CF:SG:WW:EG:PF:EF:RC:EQ:YY:HA:SW:GQ:YK:GK:VF:QD:VE:QI:RI:EM:IR:LI:VG:AF:WE:KP:LT:HV:DR:DE:PA:FN:FV:DM:DY:MM:VH:FA:KT:GD:KG:DC:KR:IK:FH:GY:WG:WD:RR:RM:PV:IL:NS:EP:AV:PY:TP:TW:RF:TI:HR:FY:PW:IV:ED:HM:GL:KM:DL:CK:IA:VS:LD:IC:CV:NM:YP:YV:NY:RP:AQ:LS:KD:PS:MH:GI:MF:KY:QG:PR:SL:QW:SM:SQ:IG:HK:VW:GH:KI:FS:RA:SN:MR:FP:KS:QH:DA:TF:PN:VN:TC:FQ:HD:IH:HC:DN:AL:CR:LW:RY:EV:VT:VK:MG:CN:HP:TL:EK:KE:DP:CG:LH:NW:ML:MC:LG:AD:WN:AE:GS:QQ:NH:VY:SE:GW:ER:TG:NA:QP:DI:FD:TM:MK:PD:YI:AT:FF:DK:KC:NF:SI:MY:GA:NN:SD:QC:NV:II:QV:RK:FT:LC:EL:AG:AW:MV:AR:WY:DT-EW:HG:EK:CE:FF:HN:HV:FQ:MP:LQ:DQ:DW:RT:TH:GH:HI:NI:FN:CM:RQ:WH:VN:GT:RW:LA:SH:KV:DR:MR:GQ:CD:TM:GA:PT:FL:QR:HW:CY:VV:ET:TR:AG:HH:AN:IE:DF:MM:TQ:WE:CN:DA:RC:QM:MY:PK:PS:NN:YY:HQ:EP:VL:IY:MA:MW:MG:CH:VY:DG:RP:SS:EC:SN:WI:YD:YT:LD:WC:SY:DH:KM:IQ:KY:VD:SQ:SD:RF:PD:WN:HD:VT:HR:LK:FS:MC:GV:LG:VF:GN:QL:PQ:AE:YA:PW:FR:QG:IH:YM:NW:WF:PV:WS:AA:LT:EG:VC:GY:GF:RR:TK:GR:FD:RN:IN:QS:VW:AY:GL:MD:IL:IK:EE:FI:KF:NC:KL:TF:TW:WR:LY:QQ:MQ:SK:LR:HK:VA:CQ:NQ:RE:KQ:NG:WA:HS:TG:AW:VQ:KN:NP:KS:QW:AK:CF:FP:PI:SE:MN:PL:NE:NM:HY:DY:MS:FM:AL:LE:DV:YG:PP:RH:PR:KI:KA:KG:KW:WL:MI:LP:SI:HL:EF:YN:ID:FE:ML:HM:QY:KE:SW:II:YR:CR:NA:IR:GG:ER:AF:VH:YL:QT:YS:DI:WY:NK:IV:FV:PN:DK:FA:DS:AS:ED:IM:SR:LS:FY:EL:AD:GM:NR:WQ:WG:LI:QI:FH:KT:CL:YC:RV:KK:DT:QA:MK:ME:WK:AC:NT:QP:TS:DM:FK:NS:VK:ES:PM:NV:WV:PY:NY:EH:AR:LF:YW:AP:CV:RS:QK:HF:CW:GI:GC:DL:TD:KR:YQ:CT:VM:EM:AI:TL:AV:NL:TV:PA:YP:AH:IF:PC:MF:TN:HP:YK:YF:EQ:LW:SV:PH:EN:SG:GP:WD:WT:RM:ST:RG:TE:YH:YV:KH:GS:LN:VI:KD:QV:MT:RY:CK:HE:IC:AM:KP:PE:IS:QH:RI:WW:HC:GD:NH:IW:HT:LL:DD:VP:DP:AT:NF:YI:QE:LM:VE:EV:CC:SL:SM:IP:TT:FC:TC:EA:TA:QN-VY:TG:HH:VV:WW:RP:IM:FP:DV:GF:NP:TN:RS:MW:AF:CW:NW:LW:AA:SG:ID:VE:WS:QA:LY:DT:SQ:AQ:MI:RC:DW:SP:AP:NT:TI:FF:IV:AM:LF:EF:LM:PS:IE:ST:DR:YM:TC:GN:VK:KV:LN:QS:SF:CL:TW:HG:VG:WK:VA:GQ:CI:IA:TE:VN:FN:ET:GH:PD:RN:QV:TR:IY:HW:MQ:LP:QL:YG:FG:RR:PG:KM:EQ:DP:WR:MP:EW:PP:WI:GE:KH:CM:CG:AV:RG:NL:FI:DH:GV:IP:AG:GI:VH:HK:CE:FS:LV:EN:RM:IR:WP:SV:CR:PW:EE:KN:CD:WL:PC:KQ:QG:TT:FE:MK:FY:ND:AK:IQ:HV:WQ:RW:DF:KT:KS:YY:KE:QH:ED:YK:YP:LL:LH:YW:KL:GT:NH:VT:CP:HI:PF:AY:AD:NK:DA:FQ:TQ:YF:WG:SE:RL:HR:VW:YH:CF:IL:MY:QN:KD:GY:CS:QM:VP:LS:SW:RF:HT:EM:SS:QY:VL:MR:WC:YS:EC:HS:FH:ES:DC:LQ:AE:WV:MS:FR:NS:FW:DN:YL:HQ:EP:SN:RE:RH:ME:QW:TF:EH:TK:DQ:IH:KA:NY:SK:AN:AL:CC:NA:CA:VI:PT:NI:WT:FA:MA:EY:MN:YT:WN:DI:HD:DS:MD:PR:DK:PA:LK:YC:RQ:PH:WH:TD:AR:IK:EL:FV:PI:HF:RY:TL:YR:GK:HE:NN:SH:IW:MT:IF:QI:EA:GW:GL:II:CV:KC:LC:SY:DY:MH:CH:PQ:HC:LE:CN:NC:IT:GA:KP:NQ:YI:QC:QP:WD:NM:LI:KK:DL:QE:VC:WM:YQ:QK:FL:IG:MV:TP:IN:DE:FM:SA:SD:KW:AI:TS:MC:SI:YE:QD:FK:TV:LA:GR:VD:GD:MG:KR:KI:RI:RD:CT:EI:DM:TM:GC:MM:VF:PE:MF:PL:WF:IS:PY:SR:AH:QF:ER:YV:QQ:TH:NV:LD:RT:AC:AW:DD:SL:HY:RV:LG:HA:EV:KF:KY-YF:PE:CR:EN:IL:KP:ET:HM:VM:DS:IA:QE:PC:FE:GK:DF:EI:RY:YQ:LL:PA:HI:NG:DD:KW:QC:FP:WY:YH:VT:IE:YM:QD:RI:PW:GF:IF:HV:DV:VW:TD:CD:WP:MC:LN:DT:IS:HY:SF:WH:GG:LE:TY:VD:IR:MP:QH:YR:RA:MQ:SV:LK:WW:IK:MF:SD:NQ:YS:HS:QL:IH:GP:QR:RC:LM:RN:NT:EP:SQ:NE:TT:MS:RD:HK:NL:YY:NW:WM:NS:LR:FN:EV:KL:MG:QP:KR:KI:RP:EC:LA:WF:QW:EG:PV:AL:AA:CQ:MW:RE:PP:DW:FS:YI:PD:RL:LH:RW:LW:VQ:HF:SE:EF:MM:SP:TN:QI:QG:GL:DP:EE:KQ:SS:GC:HW:VC:FC:MA:KN:GY:KT:IQ:EH:CI:CL:RH:PQ:ID:DI:TM:ER:KS:YW:AE:QN:IM:AQ:QT:KE:PF:CW:QF:VE:HD:GI:TK:NR:GH:LV:AD:DE:FR:CM:WR:DA:RM:LF:FK:ED:NP:CP:HT:QV:GQ:TC:PN:MY:PR:ND:VP:EA:KF:WS:ES:SN:VS:YL:YA:MR:VF:YV:WK:PG:AV:WT:LD:RV:QK:FM:HG:TG:VN:QY:ST:DL:KC:YT:TA:GN:NM:PI:EQ:QS:WN:AR:FH:SG:TQ:KG:MD:TV:TS:WA:GS:FV:TR:KD:IY:CC:CK:SY:VA:CT:AK:SL:LY:GV:IG:GA:NA:RS:VR:WL:WQ:TL:EW:RQ:RT:TH:IN:YK:DM:DY:SH:VY:FT:FI:IV:DG:AS:DK:SM:DQ:QA:II:SI:QM:HL:FQ:YD:YE:EY:EK:LG:NC:YG:RR:PS:MI:PT:YC:AY:CS:HN:GR:HH:NY:CY:CE:WC:CA:WE:PM:TI:AC:DN:TE:MT:CH:IW:AI:KM:LP:SR:NV:VV:VL:TW:NI:YN:LT:CF:CV:SC:IC:GD:LQ:FL:EM:VK:HA:SK:RG:SW:AP:NN:CG:GT:KY:PK:KK:YP:AN:AH:LI:SA:HQ:WG:FW:AF:ML-AE:AW:CH:KP:AF:MD:NA:DV:DG:VN:NG:KA:PF:QI:QG:EN:EA:VR:YS:PY:DE:YA:WS:MH:ED:SL:MR:HR:SA:MC:WP:EV:PL:PP:AP:FT:HM:VY:WV:DF:EE:LK:WF:VG:SV:KE:SP:WT:FN:CM:LE:RW:DC:QW:GV:KK:QD:HL:RT:AK:HD:KH:WQ:KT:VW:CY:KQ:WH:SY:ML:NI:RQ:TW:CQ:IG:IH:NQ:AY:KC:NK:PD:PM:GF:AC:MM:QP:HP:RK:DQ:DS:TI:QR:LM:IT:CF:LD:QK:SR:LS:DY:QE:NV:TC:FF:MY:WG:CN:GS:FC:CS:RP:NM:QL:AL:MK:ME:WK:LP:TE:TK:EC:IK:EQ:IS:MA:QV:RC:DK:FI:PK:CE:IF:FV:RD:DT:IW:RF:HC:VT:ND:EF:NH:TQ:KL:YK:VF:HN:FM:SE:CL:PR:VQ:CA:ET:PH:HS:PN:CR:IP:WA:FW:KD:QQ:HF:QA:QT:AV:DM:TS:FQ:NW:WN:RS:CC:TF:WW:EW:EP:FS:VP:LV:CK:II:FE:IR:FH:TY:SW:IY:LN:HA:TG:YV:RG:VV:VI:WI:PI:AN:YW:QH:GG:CG:EG:TR:RE:PS:YY:VM:LW:IQ:VS:VE:SM:HQ:YR:YF:AR:HV:AD:AS:GR:MT:HI:FR:TT:FP:GL:SN:QN:FG:NF:HY:SC:HW:HE:DD:MQ:QS:IA:CT:SH:FK:RA:RL:FL:LC:DW:TP:GI:YG:TD:MW:DI:GW:IL:RV:SQ:YP:GA:ST:LA:GE:VA:WM:RY:SD:FD:AQ:WL:MG:YE:QC:NR:VK:SG:FA:IV:RN:HT:KY:TV:TL:HG:LH:GK:NE:CW:LL:ES:ER:RI:KN:TH:WE:HK:LQ:CV:MS:KV:NY:VC:LF:KW:IC:CI:AT:PE:GC:GQ:KF:NT:YM:EY:MF:ID:IM:RH:NN:DR:TA:GY:RM:LI:GH:WY:KS:GD:AI:QM:CP:SI:PC:AH:EH:RR:HH:NP:MN:DH:YI:LG:LT:YT:KI:TM:AM:EI:TN:VL-SC:VY:QQ:GN:KN:YP:HY:VS:GE:PS:YM:RS:WT:IQ:WH:YS:TL:YA:AE:AY:PP:VT:WW:NA:DS:EQ:LH:LY:YV:IK:AS:WY:TG:FN:GD:EM:CC:RW:FM:KG:QL:RL:LS:KD:FQ:IA:SN:QW:GT:CL:GF:PH:SQ:TE:MN:KK:AF:MM:EP:LQ:HN:FL:VM:VN:GV:FA:PL:KW:HQ:CM:FS:WK:AD:IN:AG:VI:WS:IG:QT:GM:QP:DE:RI:HW:MA:AN:NQ:KM:EA:IC:SA:AI:HR:HP:WC:RY:MI:EV:WR:RR:TI:PK:TD:TK:TT:MY:WP:EN:WI:TH:YH:QD:MD:CQ:YD:DM:HD:NK:CY:NS:NN:KI:QA:CN:QG:GW:GC:FH:DV:HM:RD:QH:EK:VV:KY:EF:GH:CW:LP:RK:PA:GR:HC:CG:KL:DA:SR:NM:IR:GY:CV:HT:MC:VH:VW:TP:SD:RE:YW:YQ:AC:YI:MF:NG:MK:QC:NY:FY:KS:QV:YG:FD:LE:GL:TN:KF:SV:FW:CD:FG:EG:DQ:LI:PV:TV:CT:IM:LM:LR:RF:VR:SK:FE:DI:IF:PT:YK:KV:DL:FI:VK:QM:GP:CA:NP:PW:SM:EE:KE:AM:DY:SY:PR:MG:EI:GG:AP:PN:WL:YE:FF:II:IY:ND:CE:MV:RQ:MR:LV:HE:NI:KH:NT:HS:NW:RT:TR:RG:GI:YT:YF:DF:PD:WF:LT:DP:HV:HL:WG:QN:LD:TQ:NH:DK:PF:YC:CP:KP:FT:ST:TF:GA:KQ:CF:WM:HI:IV:ME:MQ:SH:RC:ER:FK:EW:MW:EC:FV:MT:ML:MS:GK:CK:FR:LN:QY:LK:TA:ES:AQ:VP:QR:IE:EY:RP:FC:SG:TC:LC:AR:IT:SI:DR:LG:HH:KT:TY:LL:WQ:VF:DG:ET:PQ:AK:IW:EL:FP:TW:PC:LW:NF:YL:KR:PY:VG:HF:DT:RN:QF:DD:IH:GQ:CH:LF:RV:YY:CS:PM:NR:IS:QE:DC:IP:SE:KC:NL:WD:GS:SL:QI:KA-NS:RM:DT:HN:WP:QW:NQ:LK:DW:VD:WS:FR:EP:YW:NC:HS:LL:MF:TD:HF:NI:PQ:CI:QA:KR:QY:PW:GN:TI:GM:IQ:FH:LN:LG:HY:LQ:KS:DI:HG:DA:PP:WD:PI:AT:SR:QR:HI:WH:AD:TN:IH:QV:GV:HV:ET:NR:RV:HR:QK:FC:SA:QT:MG:MA:KF:YS:GP:VG:VF:QG:NH:KC:GT:IN:TC:IA:AR:RK:GS:HC:CF:PE:ML:LM:IK:GA:KT:HM:SE:NK:VI:PM:CN:AF:KD:TK:PC:NG:KW:MQ:QQ:PF:RS:MM:LV:MN:SY:SL:DR:WI:QL:PR:TT:CW:YY:MW:PY:VY:DK:EK:YN:ME:RE:NW:VN:GD:VH:FW:SN:CP:RD:TR:SM:TH:PK:PG:KL:EW:RN:VE:IF:HW:YG:MH:RL:AV:PT:WL:GR:HD:CG:LH:KQ:SG:ND:DE:AM:GF:TW:QC:KN:KH:LF:LC:PS:SW:LS:KE:NF:TP:DY:KG:PH:WG:IL:PL:NA:GW:KY:ED:IT:IV:VQ:EC:TQ:EM:AS:TE:YH:FN:MC:FG:GQ:FT:HH:IP:IY:TF:HA:FP:FK:DV:SK:GI:IW:DM:CT:WA:SD:TY:QD:LY:KA:KP:SC:CY:RR:PN:CQ:MI:MT:VK:CC:WW:AE:AQ:RG:WF:YF:AA:IG:QN:LI:RA:DC:KK:AP:VC:CK:NN:DS:AG:FD:WV:NM:YQ:MK:DL:QE:AL:VV:RC:YI:MD:IM:IC:SF:RQ:AC:EL:MS:DD:AH:YT:SP:PD:FA:FY:LR:SV:GL:WK:RP:VW:HE:DG:AW:DF:PV:RT:KV:YD:KI:CD:IE:AI:SQ:NY:TM:SS:LT:WC:FQ:NV:CA:TA:RF:ST:IS:EH:PA:YV:VR:HT:ER:WE:DN:DP:QM:NE:MV:FV:ES:TL:LW:FF:YM:YK:GG:LA:CV:FS:RH:DQ:QI:WT:FL:YR:GC:EI:RY:GE:QP:TS:IR:LD:WQ:KM:LP:DH:NP:EQ:YP:MP:WN:LE:EG:AK:RI:EA-LF:TN:LS:LR:VN:MN:YQ:QM:LD:LE:RY:AN:VL:GP:QH:DI:EA:HI:FN:QS:TL:WP:DL:DM:IF:TT:HW:CE:AL:PR:ND:HR:WL:YY:PM:TF:GR:TM:AP:TD:AD:MS:QK:HF:VD:HT:QW:KA:VE:FM:VG:IT:NL:NG:IE:CD:NT:MF:VR:MI:SI:GG:PA:DK:TK:YG:EM:CR:SY:QF:MK:FP:SE:TG:EK:MM:WS:FS:GT:LK:CF:RR:ID:KR:CV:QG:TR:QL:PN:NC:CT:LH:MD:NE:EH:FW:LW:WI:YV:FV:SA:GQ:FY:LT:GD:RH:YM:CL:VW:AQ:FR:MT:YN:CN:VC:LN:LV:HE:EI:FL:CI:HP:NI:TA:PG:AF:SN:EN:LY:YK:PS:KF:IK:MG:YE:TE:MV:NK:GF:EQ:VT:AH:NW:SP:PF:QA:FC:SR:MP:CM:RG:NH:RA:CG:CW:HM:RL:PL:FH:PQ:QI:HY:GK:IR:NR:LC:NM:SV:EC:RM:WD:TP:QY:YP:QT:KV:FI:GN:VP:FA:DY:TS:GC:LQ:AW:PW:EP:RQ:AK:ME:CC:IA:ST:IN:PD:LP:WT:MA:GW:KL:KI:AM:WH:IQ:RF:IL:KG:KC:CK:HH:EV:KK:KN:RP:KM:CY:SQ:DD:EF:DR:HG:WK:ED:EE:WN:VQ:SW:LM:TQ:YF:VF:MR:VM:DW:LA:DQ:QC:YT:SF:FF:AG:WV:PY:CQ:AV:YL:DH:RS:RN:SL:HS:AI:CA:WC:QP:TI:RT:ML:PV:CS:GY:QV:HQ:DT:ES:KD:NV:WM:IP:YD:MW:VS:RI:PT:HA:FE:RW:MY:SC:YI:FD:GA:CP:ET:FT:RC:HD:II:AR:TC:QR:SH:IG:RK:FK:RD:TH:KT:EG:GH:IM:WQ:DV:VY:IC:PP:WY:GE:HL:DN:PE:YW:QN:AT:KY:IV:CH:NF:NP:KP:FQ:EW:WG:DS:YH:SM:WF:RE:ER:YR:KS:HN:TW:DE:YA:QQ:AC:KW:TV:KE:NS:KH:DF:NY:GI:IW:PI:YS:NA:QD, num_identified_proteins=5502\n",
            "27.0%\n",
            "num_binder=15, num_dipeptides=350, sample=0, binder_set=FD:YR:HF:TD:HS:HM:GQ:MF:FI:RY:AA:AD:LE:MW:NT:KF:IP:EW:NI:SS:QF:MK:FH:CV:KC:VH:IS:AT:TN:SA:WT:IV:NA:AQ:RA:RN:LT:HK:LI:RT:IC:ED:CD:TK:PI:FT:WW:LQ:SP:PF:LF:NP:DK:GM:LK:AK:TW:GC:HT:HY:DR:NS:NE:SY:NN:FP:KV:GI:DH:EV:AG:QK:EE:DE:IN:HA:DS:ER:QM:AH:RP:DQ:NM:HN:YA:KA:VI:PY:SN:FW:EF:CG:YT:KG:ME:LL:YE:SR:KL:PT:EK:CF:SE:FY:YN:KN:GK:YS:TH:WP:WM:NY:KK:PG:GT:PP:DV:QN:LM:YG:NQ:AR:SM:RF:KS:LC:FK:CE:MG:KE:MA:NG:SW:VL:FM:LV:QR:LY:PS:VM:FR:AI:AP:AL:WG:WV:PW:KT:NK:IA:NW:LA:IR:ET:GV:WY:DN:DA:YV:EM:SD:PV:HV:VN:QT:WS:RL:MH:KW:TE:KM:SI:HI:QY:HW:KD:NV:RH:QG:VW:FA:GL:KY:TV:DI:EN:AM:QC:AC:NH:YP:RM:IQ:CW:YM:CI:CL:EA:AY:MT:SQ:PD:PQ:HH:LR:YC:IH:IK:MS:TF:SG:GA:RR:QP:MV:EY:PE:GW:WD:YD:WA:NF:YW:MY:ND:HC:GP:LS:RW:HE:QV:NL:PK:IW:DY:LH:KQ:FC:VG:PN:ES:VP:CR:IT:WK:RI:TC:TS:TQ:WQ:TT:QS:LW:FS:II:KI:PM:PL:RC:HL:QL:IF:CH:MM:GD:WC:GH:AN:MD:PC:DF:FL:WH:IE:YK:VQ:ML:MR:YF:VD:WI:ST:FQ:RG:CA:VS:DD:QE:QW:AW:SC:AV:GS:IG:YI:RK:EQ:FG:LP:GG:VF:DG:GY:TG:EL:VC:CM:RS:DP:AF:QA:PA:IL:CN:TR:TY:DL:TL:CC:CQ:GN:HD:HP:HG:TP:WN:PR:SF:QI:MI:KH:VK:DM:TM:DC:EG:PH:DW:RE:MQ:LD:KP:TA:VE:MP:CY:YY:QH:IM:NR-MG:LV:GR:LW:CT:MQ:ED:PT:WT:QF:PW:VC:MH:WL:YH:PV:QE:GA:IE:IV:WI:VN:TM:MS:LC:GT:KD:IY:PY:HP:RA:SC:AE:GM:HQ:NV:TN:DV:HG:FD:KN:VP:RC:EH:IW:PE:SK:WK:AC:QS:CP:PG:QW:CL:WN:LM:LN:QR:AS:HS:EA:YV:PK:MT:CY:AA:YP:DR:TR:MI:RI:IL:GP:DC:TQ:PN:IH:QA:GN:MC:AD:HY:QY:NY:EK:VG:AF:HC:AR:VQ:LS:WQ:GD:RY:GS:PD:ES:SF:ET:LR:AY:RR:TL:NF:EG:TS:CQ:DK:PL:CS:AQ:WG:WA:YQ:NN:FV:RT:DW:PQ:YD:HV:TA:QD:KV:FA:MV:WS:HN:TH:HL:KI:TW:VW:DN:CK:AK:GK:SS:AT:MF:ST:PS:MY:VA:PM:EF:SY:TT:RG:HE:WY:TF:YI:PH:SH:FW:QP:NL:NP:DM:KY:NG:HD:VT:II:IT:PF:YA:RN:RS:SV:RP:YE:YW:RV:DL:YK:DY:AN:EC:WE:CR:IG:MP:AG:IK:GH:RF:NC:AL:GI:VS:RW:QK:DQ:FT:DI:KK:LH:CM:AV:IQ:TD:YC:FH:TP:SQ:ML:RH:LD:DD:KA:SL:TI:PP:FS:EL:QC:FQ:FY:DS:CN:HT:KQ:CE:IM:VE:KE:KT:CH:TE:NS:GW:LA:CC:KP:SR:WP:NW:EY:FP:HA:TC:EW:KW:QL:RD:EM:YL:ER:HR:GC:FR:PC:CD:SP:GY:DA:TV:QM:ID:AW:DT:LG:NA:SI:IP:YT:QG:HI:PR:FG:CV:WW:KC:FL:VH:WD:LF:QI:YM:EN:SE:YG:RK:DH:LE:PI:GE:CG:CF:MD:YR:LP:RL:GL:LY:VR:NK:HF:LL:WM:LQ:NI:CW:DP:YY:CA:NR:QV:KR:IN:HW:QQ:RE:FM:EV:FN:VL:HM:MW:MR:LI:NE:SW:AM:HK:IF:NM:GV:ND:VV:SG:DG:VD:AP:NQ:VF:EQ:YF:IA:KF:PA:RQ:NH:GF:EE:LK:VM:WR-WD:LA:TG:MI:LF:LT:AH:AA:SD:WW:EH:VA:IN:ED:IM:DL:AD:LP:WG:IG:YS:YE:PA:VY:II:LG:AQ:QF:NR:LV:CQ:YR:FC:IV:VF:IA:LC:AN:PE:MR:DT:MY:RF:NP:DQ:IP:EV:AK:VP:TS:QW:YN:QA:YQ:SH:YP:RR:DC:ID:TY:TF:AL:PV:KM:HV:MD:SS:YG:FT:HK:HY:WA:LH:EE:CP:FL:SF:WH:SC:MQ:YD:AG:QT:AS:VT:NI:FH:HT:GN:NV:AY:CL:NG:RC:AW:DI:VL:VW:FM:RL:GD:QI:IL:YL:SW:FK:YK:CC:PY:WM:IF:NE:KC:AI:SY:DA:LK:LQ:QK:QH:HD:ND:TR:QN:PS:HI:SK:DR:VS:TK:TA:TV:YF:NQ:PW:FG:RQ:WR:WI:SM:YV:NT:GQ:NH:DY:RK:SR:GS:SP:FP:RN:TM:FQ:HL:RA:FY:FE:VE:QC:KK:GH:RE:EA:AP:YY:VN:EI:HM:GW:QG:WN:PH:GF:WL:MT:KN:WC:NA:WE:YW:RW:NY:QE:GC:GK:CE:MP:LL:DW:QY:HA:DH:KW:TQ:KT:FS:IS:DG:NL:IK:TC:KI:FW:EL:WK:EK:IC:EY:PN:MC:IH:AT:HG:LN:NW:IE:QP:RI:FA:IW:FI:CV:GM:TH:SL:CI:GA:KR:KD:HN:RS:CH:EF:TT:HC:EM:DN:IT:RH:HF:ES:YT:EG:VC:QV:MG:IR:CK:CS:KG:HR:IQ:ET:GY:GV:QR:VI:GP:YC:DS:SG:IY:DM:AF:FV:PM:KY:GR:EN:TL:PF:WP:SE:QL:ME:ML:VV:KA:VH:PL:AE:PG:WS:CR:LY:TP:AV:DD:KF:VK:TE:NC:PI:RY:CF:SN:WV:HW:EC:AC:PT:LE:NM:QD:ER:PR:KV:YH:HE:FR:NF:PP:LS:CY:VR:KL:RP:LI:VM:DV:QM:RV:TI:SI:SV:PK:RG:GT:MM:GI:VD:NK:DP:GL:CN:TW:MA:PD:RM:GE:QS:FN:YM:EP:FF:RT:VQ:FD:KS:DE-DC:LA:VA:ML:SS:KP:IK:SE:CH:AG:QW:KH:NG:RG:QF:MN:HE:YM:YN:PP:QR:VN:ED:RK:WP:VS:IR:WM:PV:ID:WW:HQ:KW:HW:NE:TK:AW:EY:QL:GV:HA:HF:DS:CL:TL:GP:HG:SN:LL:GE:NP:CW:SP:LQ:VF:MK:CC:EQ:DY:PN:DM:AE:NY:PE:AP:YQ:MI:RC:AC:HP:YI:SV:EA:RT:SQ:KD:MD:WN:LS:MR:PD:QI:GF:EE:TQ:YV:GM:RQ:QY:WR:SC:CS:WY:WL:VR:LG:LY:PL:PC:NI:PK:TF:PM:VD:WA:EI:FF:ER:KR:KA:PA:NV:CP:YP:CA:IH:GN:LW:HI:QS:FG:QE:QH:VH:EG:IP:PI:FV:HD:LH:LP:FA:EV:IG:MS:RW:SI:NW:MM:TN:IV:VK:FW:RY:TH:VT:TC:AD:IA:LC:FR:WD:TP:CN:RV:YW:EL:HV:QQ:KF:AH:IM:DA:NC:QD:RM:MH:YL:AR:LR:FT:AS:KV:CV:SR:RH:CF:RR:RI:DG:MT:TG:GS:TS:GT:NM:FM:YD:VM:GL:IY:KT:MV:LE:ST:LD:YF:FI:HM:VL:TV:SY:QA:MY:KL:RL:PY:GD:VP:AM:EP:SH:CD:QN:GH:MW:DR:TA:NF:YY:RN:AQ:TI:WC:MC:IE:KE:WG:DK:DQ:MG:RD:DL:HH:SW:IN:SK:ES:CR:GW:AL:AA:YC:MA:GG:IW:AK:PT:MP:RE:PG:RP:NK:KC:PH:CI:NQ:EM:NR:WV:LK:FS:IF:CG:NL:FH:EW:TW:HL:FE:QP:LF:VG:TD:GI:QC:HY:WF:NH:AF:GY:CY:YK:WQ:DH:YS:HS:AV:KG:PS:GK:ET:EF:EK:DV:TT:SG:SM:IS:HK:YA:LT:IL:DT:WK:EH:DW:WT:TY:RA:LI:AN:WH:ND:YH:KN:FC:RF:LV:GC:FY:CT:WI:MF:DI:KK:TE:PR:QK:DF:MQ:QV:FD:HN:NS:CM:FP:DN:RS:VI:IC:IQ:GR:GQ:YT:QM:WE:TM:DP:TR-EH:QS:SC:WW:DQ:QT:RY:VG:LI:KL:ET:NK:MG:WH:WE:RE:LQ:TN:SW:AW:EL:FE:WR:AL:GM:MQ:NY:FY:AR:PC:DW:RT:PQ:VP:HL:IA:QG:FF:GG:NI:EM:NF:RA:CA:TC:PA:TH:EF:GF:SM:DS:YM:PK:PH:HT:QK:YY:MF:CM:WY:CS:DV:MV:DG:WK:GT:NS:TD:KF:AG:CI:IS:SQ:EI:MW:WT:GD:KV:HG:NE:KK:HN:KP:PD:YD:CW:MD:DN:NP:SK:KA:RR:IK:SR:KC:MS:SS:KN:TA:RV:SD:VD:DK:RP:RD:NC:CF:NV:NQ:NG:FG:WP:ML:CL:NR:PY:CY:RL:FM:EC:YL:QA:DF:ID:IQ:WS:VL:LH:YF:RG:LK:GK:HI:LS:GN:GA:TI:DD:WA:KY:EY:CN:VN:HC:KR:CG:AF:IM:ED:YI:FT:DY:SY:YT:GR:FQ:PR:HR:VM:TT:PW:WC:QE:AC:GS:PF:YC:IC:TS:QY:HS:GC:YQ:PV:AI:YK:HW:CH:IH:CK:CV:WL:VE:LM:TM:DM:IN:HQ:QV:LN:TQ:GV:KM:VY:PP:HP:TF:MR:TV:CD:QL:LG:MM:QH:ST:CE:KW:IY:EK:RK:EN:ME:TP:GH:MT:LF:DT:DL:AA:GW:LE:VR:PL:QP:FD:QF:RH:YR:KH:QI:AK:SF:HY:IE:KS:WG:KE:QD:EP:DI:WN:QM:HA:RN:RQ:NT:MC:VH:NH:EA:VF:CR:FH:TL:YP:WV:WI:FC:MN:LY:CC:NW:TE:KT:SP:NL:DA:IF:DC:FR:YW:DE:TR:VQ:YE:PE:CT:PS:HH:VW:YV:PI:WD:PG:HV:LP:AS:VK:LA:EE:LL:IR:MP:ND:LT:TW:FL:SI:FA:PT:SG:CQ:VT:SN:AN:TK:MK:ER:AD:GI:GP:GY:HE:DH:HM:MY:FV:FS:YH:VA:PM:CP:LC:IG:WM:KG:AT:LW:HK:YS:AH:LR:AV:RW:QR:EQ:IV:QW:YN:SA:VV:MA:DP:LD:SL:VC:RM:YG:KD:KQ-IM:NG:IH:FW:CR:VD:QF:PI:FM:WM:LR:SK:HD:FS:EG:HE:VS:PP:CH:KL:IV:SM:KW:ME:RF:WW:SI:WT:SA:IF:NW:RQ:HH:LS:CE:FK:DD:SP:SF:EK:RI:TP:WD:ML:WK:MN:MS:DN:IG:TC:IR:ET:KT:CT:RM:KA:VN:TE:SH:IN:GI:HA:GC:IQ:MQ:MI:SR:RE:CC:DT:YN:GW:DI:LT:WF:QK:RA:SC:WY:KF:KE:NP:LW:ER:NI:CN:TM:QY:CS:NC:QN:SQ:YH:WE:NN:MY:AN:EI:NY:LL:CD:WQ:VE:LQ:FF:LH:HL:EL:ST:AQ:PT:KC:WV:PN:SN:KP:PV:FN:RH:YI:NL:DV:AE:NS:KQ:QG:MK:VL:VP:PE:TH:DA:NF:MV:GF:MM:GN:QW:NE:IP:HN:HQ:RC:SV:AV:FC:DE:TI:QA:PD:IK:TT:HR:LC:DP:MC:YT:QL:SG:IS:VH:QV:EW:RD:CL:VR:DF:VY:WS:NA:CP:YR:TW:WP:TR:CM:FH:VV:CY:FI:VI:GY:TL:CW:SS:HW:HG:SW:MA:TQ:YM:VG:VK:VM:IL:FV:DS:MP:GV:MD:EY:NR:KI:AF:EE:LY:WA:HM:WL:YA:WR:YE:CA:PC:KM:LD:NM:MT:GM:RL:IE:GQ:KS:LV:DY:RN:EM:GA:YS:AG:DH:RT:CK:ED:KR:TF:EH:LG:PR:NT:AP:DR:IW:SD:AA:HK:AS:ES:FR:HP:CQ:PA:DQ:GD:SE:QS:GE:LA:QR:WC:WG:KD:EC:FL:GT:KH:IC:PG:RS:MH:TS:MR:LF:TN:GG:QC:NH:NV:AI:MW:ID:AT:AH:AW:DG:DM:KN:VQ:RP:HY:SY:CI:WN:FY:PL:ND:LP:HF:EV:GK:FE:QE:QI:KG:FP:II:YK:NK:EQ:AR:QD:RW:KY:DW:CF:RY:RG:HS:PY:GS:TY:QT:WH:LN:GH:QQ:CV:CG:AD:EF:LK:AK:YL:YG:PF:GP:VT:TK:KV:DK:YW:MG:IA:HC:HI:RV:NQ:VF:VW:AM-SA:VF:KC:QY:YV:CK:RS:WT:AF:YM:LF:WW:PW:NK:AD:YY:QR:FL:IF:NG:RI:AI:QM:RK:ED:TI:PV:VM:WC:YK:VD:QV:HR:SE:RV:WP:SV:YH:NI:HH:CP:HV:FW:SD:ME:KM:YC:WM:CY:HC:PG:FM:DE:FN:CC:QS:EG:FQ:LM:AY:DI:IN:LA:WI:EN:RC:ID:EV:WK:YQ:NC:GC:IW:MM:QE:TF:TG:HA:DH:DL:SL:EI:EH:LK:KV:TV:IS:NE:NR:VP:ML:IE:WN:PK:DQ:CT:FG:ND:LP:QD:FD:TL:GF:GE:IR:AP:AQ:MK:RD:WY:DW:YI:EY:QK:SG:TR:WR:SP:CS:TT:MH:QT:FH:GG:KL:MI:CE:GL:NN:DG:SC:CA:YD:KE:VL:FE:VC:WQ:CI:YG:PC:YE:AL:VT:LT:GR:HP:RF:WF:LL:SM:KW:HG:TW:RQ:LH:GQ:GA:VY:EQ:HS:VK:PM:RR:GW:SH:IL:MW:WS:PA:LG:IP:RT:TA:ER:IG:DD:VA:GI:FV:HQ:DM:RY:PI:VV:HE:CL:SW:IV:EF:NA:RN:EA:HT:HD:NP:NH:HW:YS:FC:QW:QI:MR:GP:CN:AA:QN:DV:VN:RA:SF:NS:CM:FP:DP:KR:YF:KS:AK:CG:VI:SQ:WG:PT:CR:LS:WE:HN:NM:NY:LV:RW:LC:TM:GY:VR:FT:IY:WA:SN:AE:PN:AS:VW:KF:ST:KA:PQ:MG:FA:KN:IA:TP:IQ:KD:HM:LY:QL:TN:MD:ET:VH:GN:MP:HK:AM:CV:LE:IT:DK:TE:YR:FF:YT:WV:CD:AV:EP:YW:PP:DN:RL:KP:MS:EC:MY:KT:DT:KI:SY:KY:PE:KK:DR:RM:QQ:PS:MF:AW:TQ:MV:QA:GH:SK:YL:WD:CW:TY:EW:MN:KH:RP:QG:HI:AG:GV:LW:QP:KG:AR:EE:TC:AC:IM:WH:NL:TS:GT:AH:FY:HY:LN:PF:GK:EL:CF:GM:VE:QH:GD:RG:LD:CQ:VS:FS:PH:DA:PD:SS:MC-YF:RH:QR:AE:IA:QH:HA:IT:FH:TC:CN:PW:QY:YN:KM:SW:DT:LI:DR:KC:FK:TQ:QQ:PF:LK:HP:FS:SY:QE:FE:DA:FL:WI:DM:RI:SF:SH:RT:FY:MD:FC:WT:WR:CI:HF:AT:SI:IG:PS:DG:TI:GS:VI:SS:SV:ML:WY:CT:NM:TS:GY:KK:AL:KS:GT:MP:DY:QA:PI:GW:PA:VC:DQ:FI:MT:VA:AP:TT:FT:RD:ST:LF:WK:CD:YL:WQ:SG:VG:NE:NL:ID:AN:SM:PM:EE:ND:AV:CS:YA:AQ:CC:HY:WP:KD:VP:NC:LD:LL:LA:VF:TP:SQ:ME:MM:YR:WV:TH:HC:EG:YK:II:EM:CA:IW:DF:IF:IR:VD:IS:DP:QP:QW:QM:NG:KW:NF:AG:PH:KN:CG:NK:GR:DS:GI:YM:KA:AK:DD:DV:LV:VH:PN:MQ:LC:MH:LM:TF:IV:LW:HI:IK:PQ:FF:SC:QI:TA:HD:KI:CW:LH:LG:IH:RL:AS:WF:RG:MS:TL:PT:NN:NQ:MV:IN:GH:RM:RF:EF:KG:AM:PC:NW:RA:QS:FD:CM:RS:HR:KH:HE:PV:NP:YH:WM:IL:CQ:PP:IQ:NY:TW:ED:LR:LS:MC:ET:HS:YG:VN:EK:KP:KV:CV:HV:VL:FG:MF:TV:DK:HM:VY:NR:VQ:KT:CE:HW:KQ:QL:IC:CH:AC:GD:WW:WA:TY:SR:SN:LT:QT:LQ:FN:WN:FQ:EC:VT:EL:IP:SL:EP:CP:VM:PG:NS:NV:GG:EH:GK:PD:VS:LN:DN:HN:GN:DE:VK:QD:HT:CR:QV:YS:QF:KR:CL:TR:NI:WC:GA:YV:RQ:RV:IM:QC:WS:ES:EY:MY:YD:RN:YI:PK:HH:FA:CY:FP:EV:YP:FV:CK:SE:LY:HK:YE:EN:AA:NT:AI:YQ:DW:DL:VW:SK:TK:QK:MK:CF:RK:RP:ER:PL:LP:FR:WG:DH:MW:VV:DC:FM:VR:QN:LE:YC:AH:MR:MG:VE:HQ:WD:YW:RY:KL:EW:RW-GY:WE:GD:GR:TW:MV:EG:GH:PT:VF:SN:YR:DP:RY:VN:NI:FS:NK:MQ:AY:TC:VQ:TF:TR:LL:DC:TH:DL:FF:LR:MF:VK:HH:VE:II:AN:KF:WC:GG:RF:DM:SQ:RW:TN:AA:SA:NL:WG:CI:SY:PC:KP:QL:WR:KI:VW:GK:QN:RV:DG:WD:FV:AK:LY:YE:EP:RA:FQ:IL:TL:MI:PR:DI:PL:SM:CH:DW:PN:EF:VR:YK:IC:NN:MG:QY:GN:VD:DE:SS:NR:QF:QD:EV:YD:CT:QR:DY:KS:QK:NG:VP:SE:KV:ID:HY:DK:MT:CL:RE:DF:YM:FL:EW:LI:RH:RG:QS:RQ:YC:HI:PH:HW:PP:TK:VS:SI:QE:NA:HS:YV:FY:YP:ND:WA:IE:KQ:MY:ST:NC:CP:QQ:YF:VT:HG:FG:CM:MP:ML:YW:LE:SC:PA:CG:ER:YA:FE:YT:HD:KD:FN:PK:KE:EY:KK:TP:TG:MR:YN:WT:RC:PE:SP:CD:EN:YH:VG:RM:MK:LG:EA:SW:KC:SL:NT:CS:NM:EE:HK:FD:RN:KM:KW:LD:FC:MD:WW:RD:IK:KT:CV:QG:NS:QH:AL:CW:FI:GF:SG:GL:AR:KL:KH:DT:AG:HA:FM:IN:KN:TE:PM:TD:DD:CY:VH:WQ:IA:YG:TM:HM:AC:KG:NV:IH:QI:IY:VM:VV:MW:GV:LA:IV:LW:LP:RI:PS:AT:EH:NP:CN:IW:VA:VL:MH:HV:QM:LV:IT:ET:AV:EI:WY:FW:TQ:SD:RT:GQ:GT:AS:TV:CA:WP:WV:DV:CK:QT:HC:GA:DA:EC:VC:DS:QW:KR:QA:TA:RR:NF:NQ:MN:GC:YQ:AW:SF:LF:EK:PQ:WN:DR:GM:FP:NY:CR:PG:ES:LC:EQ:TS:ME:KA:GW:HT:FK:LT:WM:DH:HN:AQ:ED:SK:PW:QV:WS:HL:HF:LK:IR:CE:CC:RP:LN:MM:GE:HR:GP:RL:NE:KY:MC:QC:LS:PY:NH:YS:AE:PV:IP:FH:IG:AP:IM-EA:VD:FW:AA:KG:CD:MF:ER:TM:RV:AF:WY:ET:PR:NC:EF:PF:PA:LA:MN:CQ:EM:HR:EL:NF:GW:II:HV:EW:GF:HE:AI:GD:TS:FN:NT:YH:RM:LH:PD:YM:QS:AS:YD:IP:DF:TQ:EE:PI:LY:KN:MW:MA:AE:VR:MT:QQ:CE:EH:IR:AK:MR:HL:KC:TR:QC:VQ:HF:RH:SQ:YW:CW:DV:HM:KF:FS:NR:GI:FQ:LV:RL:RD:YY:VL:VG:VI:HS:EC:KY:EN:NQ:RT:EY:RS:GL:IN:QT:GV:KR:TC:QW:GM:TF:KM:LR:WA:VH:HK:AM:HD:GR:WH:AQ:FK:AV:ID:YA:GN:HQ:HH:CK:LI:DG:IY:SD:HA:NK:EG:MI:CI:NS:SA:WI:YG:NA:IG:EK:MV:SH:KA:KQ:CA:LG:AW:FP:VC:NW:VY:IM:TK:WD:PC:ND:RF:MK:RA:WF:FE:CS:HG:DI:GS:LL:ED:QH:SE:WK:PE:YL:CR:QF:WM:YV:DQ:MS:FH:LM:SY:KV:WR:CC:HY:NI:FD:NY:RI:IQ:LE:PV:QP:MQ:TE:QV:PN:GP:YF:TN:DM:RN:PH:VE:FL:RC:WC:LC:CV:VF:CN:QD:YS:LK:RR:SI:QM:SN:GE:DH:IW:TT:TP:SF:SV:QA:ML:HN:RW:AN:GH:PW:CT:TH:PY:KD:NL:LS:AL:KP:WL:NN:SR:DK:HI:IV:LT:IA:ME:IT:VT:PK:PS:AG:WE:EQ:GT:SL:LD:TA:QN:FY:LP:TL:PT:HT:RG:LQ:CL:CH:GK:KL:SW:FT:EP:NP:SG:LN:ST:FC:VA:GQ:NG:PG:CM:RK:NV:VV:YN:FG:YP:MG:QG:KI:MY:MM:TI:YQ:FA:AC:TV:FR:DA:MC:DE:AP:HW:QK:RE:LF:VM:WG:IF:TY:SP:CF:GC:VK:KE:NE:IE:VW:DR:VN:QE:WS:GG:IC:TG:HP:PP:PM:IH:WN:WT:FV:YT:HC:YE:DN:NM:CG:KS:IL:DP:FI:DW:IS:RY:YK:WQ:AY:AD-KP:VF:NL:DE:EE:WT:MY:IN:DY:FH:MM:AI:AL:ME:PI:AS:NQ:DG:IE:FE:LW:YS:YY:IF:TM:HD:IQ:CE:VW:GD:YE:GW:SY:NA:MR:YD:FW:GK:KF:DC:HP:PR:DH:CF:MN:SW:RF:ID:QV:GE:EQ:TN:KQ:PG:PK:FT:AC:IW:MF:QD:MQ:LM:PL:HN:PF:EM:DI:PM:QF:HR:SA:TE:HH:HE:VA:VG:AA:NK:VI:YP:WE:CL:CY:AT:PW:AF:VQ:QM:II:PA:QY:YA:YN:VL:NC:AN:DT:YG:VV:LI:RY:KN:WS:CA:FI:KI:TY:PP:GH:GY:SR:QL:SI:IY:MW:SM:PV:EV:FY:TC:WW:IG:AY:QQ:LA:TF:AR:WM:MT:CM:RW:MG:MV:HA:TK:KA:TH:DV:FF:LT:WF:WA:VP:IP:NH:SL:HW:VC:FC:TA:HG:HK:MA:EF:VK:DK:LL:RV:KK:PQ:LG:WN:VT:PE:YI:NR:SH:QE:LC:WQ:KC:VR:GP:NP:SS:TW:DQ:HI:TG:AG:GN:GC:QW:QK:WV:GT:CQ:QP:CI:HY:TL:VM:CG:KM:DD:NG:SP:YH:ST:DP:QN:HT:IR:YR:WP:VS:GL:HM:KH:VE:LV:AE:ND:YK:EY:IM:QC:LS:MI:NS:WK:TR:IL:DR:ML:SN:RR:RI:AM:LF:YW:CP:PH:SF:LP:YQ:SC:NW:RL:MS:GI:CR:PN:MD:RS:KL:GF:YT:CT:IH:EP:DA:FL:YV:CH:KW:FS:LK:NT:GM:IT:RQ:KY:AP:TS:PT:ET:AW:NY:EK:EA:WD:SE:HV:SD:CS:NI:IC:QH:EG:RN:RP:AQ:MH:DW:VN:FP:LQ:DF:RA:DL:CN:LH:NF:HL:VD:GS:PC:EC:WL:ER:LY:PD:QR:RE:CC:NM:TV:CD:QT:DN:GR:TT:RD:NV:SQ:WI:RM:AH:WH:CV:FR:FM:FQ:KD:EN:RH:ES:WG:LE:SK:KR:KG:LD:AK:YL:EI:ED:YC:MK:FV:RG:SG:YF:TD:GG:QG:CW:CK:QA-LI:DF:LF:IV:KN:MQ:YL:HE:LQ:GC:ST:WC:TH:ML:ES:RG:AR:RR:TR:HI:MV:YA:QW:MS:PD:QR:CI:NW:YK:LS:GW:TI:VQ:DI:QY:VA:IC:PQ:HQ:WL:HW:PW:NM:MW:PG:CM:TE:LW:PL:WS:ID:HP:IW:HD:DT:DL:GE:FR:FP:IQ:IS:WY:II:NE:NA:DD:CT:NI:DG:QP:RC:EY:KV:SM:CN:VK:TV:KM:KW:PS:QA:NG:EA:SN:VT:HR:EN:LM:RL:IN:ME:QH:WI:FW:QI:LL:AF:DP:DW:HK:GR:RQ:RI:MA:HS:RS:QK:AV:KA:CW:NP:IY:VE:PK:GG:GH:AM:PT:YW:PF:RA:EL:DK:HG:NR:AG:TK:EV:YN:RF:MD:HA:IE:VS:CP:AY:NS:NH:IG:DC:WP:GN:QG:CG:MM:SP:MG:AT:SR:GI:AW:DH:FC:HM:LA:DN:RV:DA:NL:SE:GQ:YC:YF:HY:IL:IA:EM:MC:NF:WH:TQ:MF:HV:EC:SL:SD:VL:GA:IR:MK:ED:TW:YS:EQ:VY:DV:SY:VR:WG:NQ:VC:EK:LK:MI:RE:RY:WE:QS:IM:NN:WD:YD:VH:MR:IK:VN:IH:LC:FG:AE:CA:QQ:KP:AH:CC:RP:IT:WR:YH:HN:NK:FQ:RT:DE:WN:AD:FF:PE:KE:YM:SS:LR:CS:SH:SW:GM:VP:ND:QV:GK:FT:KH:AK:FM:WV:TS:KS:KC:FI:KD:TD:KL:CH:EE:LH:SI:FE:DS:LY:QM:LN:KQ:AS:WF:KG:VD:YQ:KR:QF:RK:VW:CK:PI:IP:YR:QL:GV:FH:CE:TC:PA:YV:NC:DM:LT:KK:KY:MH:FL:NV:MY:DY:NT:SF:FV:GP:PP:PH:PV:LD:CV:SK:LV:SV:DQ:SA:PC:SQ:WM:YG:VM:HL:PN:MN:CY:FD:GL:FA:VI:GT:QN:EF:TL:GS:GF:TG:EI:AQ:PM:YP:RD:SC:VF:VG:FS:MT:ET:WT:EH:GD:AC:TM:MP:CL:CF:RW:YT:RM:HC:RN-HE:CS:EP:PG:LG:VT:DN:QE:CQ:QL:SG:YC:EV:RS:LN:LM:QF:RI:TK:ID:YK:KN:PM:GD:AA:KD:HH:YR:VI:YL:IG:KH:FW:PK:SR:NH:VN:TE:MW:HA:KC:DP:VW:CR:VV:DK:VG:WQ:SD:EN:EI:WA:EM:KI:WP:LD:HL:ST:MS:IL:VY:TH:GL:MF:PL:MR:LS:QA:CP:RL:RN:TP:FP:HC:DQ:AP:HD:FN:HQ:EL:CD:QI:LF:NY:RV:YM:LV:TI:AQ:QN:FH:CT:MN:RA:AI:II:QS:ML:VQ:HI:VD:GT:GG:EC:AR:HG:TN:FV:EA:TC:IF:WE:AV:AK:SC:FY:RW:SP:NT:DI:NS:WW:GN:IW:VS:PQ:SW:DW:VC:AS:QW:QY:RF:PW:TT:RR:TW:FA:VA:GW:EY:TS:AG:KW:CY:AN:LR:WT:MY:NP:HP:WM:RT:EW:HR:DV:TV:RQ:EQ:SI:IQ:IN:NG:GE:FQ:DG:AY:CF:ED:PV:FM:MI:PE:SN:TG:QR:HW:QP:CL:WK:NM:EF:PF:NR:GM:GV:CI:RG:ET:NL:IT:ND:HK:PR:LQ:VM:KR:DF:AH:YE:QK:DD:DL:KA:WI:IK:ER:RY:RD:DS:GC:ES:YD:NW:IM:KV:LE:WH:QQ:TY:CA:GR:QV:DR:FT:FL:CM:NK:DE:GP:HY:NI:WY:CN:IR:TM:CH:YW:FF:DC:KL:EE:MD:YT:WR:FR:IV:SK:QM:RE:LK:TR:SH:LW:GS:WL:YV:WV:HM:EH:NQ:SL:KY:QT:YG:MV:IY:AW:AF:MH:QH:GF:AT:PY:PT:PI:PN:YY:TA:CK:MA:YI:MG:VK:WS:GH:SV:YH:AE:NA:TF:GQ:MC:LY:IA:HT:LL:SS:HV:VE:FE:FI:TL:WC:CG:DM:RH:WD:WN:CC:MQ:FD:NC:DH:VL:KK:ME:IC:LI:WF:RK:GI:GY:IE:SM:DY:QG:YP:LA:MM:LH:WG:RP:TQ:RC:PS:QC:NV:EK:FG:KS:PD:KM:QD:EG:SY:KE:VH:PP:IP-IA:NC:NH:GN:DC:DV:MP:AN:QV:RH:EP:IT:SV:LH:SA:FG:QF:HY:PR:VR:QH:PC:YL:SI:PI:WL:NP:SQ:HP:DH:NG:VP:EC:NQ:KM:SG:FE:NE:CT:KA:WA:IH:FR:CS:YR:EY:GG:AC:RR:CI:DG:ML:DY:IC:DI:NK:RQ:SP:DR:MA:YE:GM:WY:AM:WK:EG:PH:AP:DK:PF:CG:LF:IF:FK:NT:ME:RC:YD:IV:TH:CH:TP:HR:FD:SR:CV:CF:GE:VF:VN:PN:QL:KK:GV:KS:TF:KQ:VT:VG:NL:GP:FL:HC:CP:IW:HQ:RN:LC:GA:QI:NF:VD:CY:SH:GQ:KG:TK:KR:EW:LS:QQ:AV:QG:SK:LA:WW:PM:FQ:HA:EK:EL:ND:QT:AF:CC:YC:HN:HW:QN:PW:VQ:GL:RL:WV:HE:EF:NM:AA:WQ:ED:HD:MD:RK:WR:GC:TG:VE:MQ:DW:VK:QS:YQ:HL:VY:SD:LW:EQ:TE:RS:SC:MF:PV:HK:PP:IR:NY:CW:ST:MV:FW:LG:WS:DM:WE:QR:DL:MY:FN:LM:HT:LI:II:LD:RV:KL:EA:DF:EH:KC:RY:CK:CD:SM:KW:TC:KN:FV:KP:KT:SS:LT:QA:ET:FA:GT:SE:LK:TL:HS:DQ:EM:HH:NV:EN:AI:PS:YW:MG:IG:QD:QC:PD:VA:FS:DA:GD:WF:QE:PY:YP:YN:FY:IY:TY:TA:TR:PG:VW:WI:AQ:YS:WD:KI:SF:IQ:WG:MS:IN:DS:YV:VI:MI:TW:AK:EV:RF:IM:FP:TS:GR:TQ:CN:LV:WN:TV:KY:CA:SL:KV:GW:MW:AT:QM:YK:GK:TM:IP:GS:RD:YY:EI:SY:RT:VS:HI:PT:CE:SW:LE:YA:YT:DE:MM:AR:PQ:RP:VC:VV:AY:RM:YF:GI:FF:TD:WT:IL:LY:RE:AW:HM:TT:HF:EE:LR:GF:DP:AD:WH:KE:CM:CR:YI:WM:NW:CQ:MN:MR:KH:TN:GH:DT:AH:VH:YM:DD:WC:ER:IE:QK:FC-FN:PQ:IA:CD:HC:QA:PT:RR:YV:SQ:NK:LC:IM:FC:KY:FS:HW:QQ:AW:AV:DV:DT:HF:KA:GY:AQ:RK:SM:FR:RC:DD:YM:AR:IW:VM:WD:ID:AT:IS:LL:LF:WI:QR:LT:QC:CK:WV:SN:PS:QI:DP:GN:AI:QP:QE:IV:ET:GS:GE:HD:YE:WK:KV:KW:FM:CQ:SS:PD:TG:TF:RT:FF:QK:VP:KH:DN:HR:CN:NF:HE:HQ:WL:NT:WG:TM:GQ:QS:KK:TL:EC:PG:DK:IH:LP:IT:YG:TQ:WR:DW:NW:LD:VW:MT:DE:MQ:MK:QF:TV:AK:PN:HT:EL:FL:RQ:RW:EG:VH:FA:SG:SF:GD:SE:PK:NA:KI:QL:HV:DI:TN:VE:FT:DG:IL:TS:QG:VS:WT:VG:YD:SR:SD:RD:WM:TP:KE:VA:LR:DS:MV:QH:GM:IN:DA:KS:HG:FK:EA:VL:SP:NL:IK:NY:LM:RM:MN:FG:YI:YC:CG:DR:VD:FP:HS:YR:KG:KQ:PR:SV:QT:PM:TE:FE:FH:NV:ED:GW:II:QN:LY:YH:WE:NN:KL:RL:PW:VQ:RV:DL:ER:FQ:DF:WH:VY:VN:WY:AE:HM:MD:LA:AP:AF:NG:CL:LV:RS:YT:IC:KN:AC:AA:QY:ME:GF:CM:NR:WS:SK:RI:GV:WQ:AN:PE:GG:VT:KR:PV:CP:PA:TC:MI:EW:LW:CR:LG:LS:PC:CI:YN:NM:RH:TA:CY:MS:NQ:DC:QW:WF:ND:PI:FV:IE:AD:GC:ES:PY:TY:GI:LK:AG:YK:IQ:KT:SC:CT:HP:MW:IG:IP:GH:YY:EF:PP:AH:YQ:CH:MF:GL:MY:EH:TR:RY:HK:AS:SW:VR:YF:YS:SH:CW:NP:EK:PH:MH:DH:NC:TI:EM:FI:MC:MA:WP:VI:AY:TH:MG:RN:HA:HN:FD:FY:HH:SI:QD:ST:IY:MP:EN:NS:VK:WN:HI:IF:YA:NI:RG:RA:CC:VV:GP:EV:CE:NE:WW:VC:KM:RE:IR:KF:EQ:QV, num_identified_proteins=6750\n",
            "33.1%\n"
          ]
        }
      ],
      "source": [
        "# This takes ~1 hour to run\n",
        "for sample in range(NUM_SAMPLES_PER_CONDITION):\n",
        "  for num_dipeptides in RANGE_NUM_TARGETS_PER_BINDER:\n",
        "    for num_binder in RANGE_NUM_BINDERS_IN_SET:\n",
        "      binder_set = generate_binder_set(num_dipeptides=num_dipeptides, num_binder=num_binder)\n",
        "      num_identified_proteins = try_binder_set(binder_set)\n",
        "      new_tuple = (num_binder, num_dipeptides, sample, binder_set, num_identified_proteins)\n",
        "      results.append(new_tuple)\n",
        "      print(\"num_binder=%d, num_dipeptides=%d, sample=%d, binder_set=%s, num_identified_proteins=%d\" % new_tuple)\n",
        "      print(\"{:.1%}\".format(1. * num_identified_proteins / num_proteins_total))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IWznHzfgtN8V"
      },
      "outputs": [],
      "source": [
        "long_df = pd.DataFrame.from_records(results, columns=['num_binder', 'num_dipeptides', 'sample', 'binder_name', 'num_identified_proteins'])\n",
        "long_df['proteome_fraction_identified'] = 1. * long_df['num_identified_proteins'] / num_proteins_total"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145
        },
        "id": "Mf7Skq4mt8GT",
        "outputId": "4306830f-049f-4ac6-e9cf-3d8066697e27"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\u003cstyle  type=\"text/css\" \u003e\n",
              "#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col0,#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col0,#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col0{\n",
              "            background-color:  #fff7fb;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col1{\n",
              "            background-color:  #fcf4fa;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col2{\n",
              "            background-color:  #f1ebf4;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col3,#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col1{\n",
              "            background-color:  #dad9ea;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col2{\n",
              "            background-color:  #4295c3;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col3{\n",
              "            background-color:  #045e94;\n",
              "            color:  #f1f1f1;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col1{\n",
              "            background-color:  #93b5d6;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col2{\n",
              "            background-color:  #03476f;\n",
              "            color:  #f1f1f1;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col3{\n",
              "            background-color:  #023858;\n",
              "            color:  #f1f1f1;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col0{\n",
              "            background-color:  #fbf4f9;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col1{\n",
              "            background-color:  #d2d2e7;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col2{\n",
              "            background-color:  #c5cce3;\n",
              "            color:  #000000;\n",
              "        }#T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col3{\n",
              "            background-color:  #b0c2de;\n",
              "            color:  #000000;\n",
              "        }\u003c/style\u003e\u003ctable id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002\" \u003e\u003cthead\u003e    \u003ctr\u003e        \u003cth class=\"index_name level0\" \u003enum_binder\u003c/th\u003e        \u003cth class=\"col_heading level0 col0\" \u003e1\u003c/th\u003e        \u003cth class=\"col_heading level0 col1\" \u003e5\u003c/th\u003e        \u003cth class=\"col_heading level0 col2\" \u003e10\u003c/th\u003e        \u003cth class=\"col_heading level0 col3\" \u003e15\u003c/th\u003e    \u003c/tr\u003e    \u003ctr\u003e        \u003cth class=\"index_name level0\" \u003enum_dipeptides\u003c/th\u003e        \u003cth class=\"blank\" \u003e\u003c/th\u003e        \u003cth class=\"blank\" \u003e\u003c/th\u003e        \u003cth class=\"blank\" \u003e\u003c/th\u003e        \u003cth class=\"blank\" \u003e\u003c/th\u003e    \u003c/tr\u003e\u003c/thead\u003e\u003ctbody\u003e\n",
              "                \u003ctr\u003e\n",
              "                        \u003cth id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002level0_row0\" class=\"row_heading level0 row0\" \u003e1\u003c/th\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col0\" class=\"data row0 col0\" \u003e0%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col1\" class=\"data row0 col1\" \u003e2%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col2\" class=\"data row0 col2\" \u003e9%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row0_col3\" class=\"data row0 col3\" \u003e20%\u003c/td\u003e\n",
              "            \u003c/tr\u003e\n",
              "            \u003ctr\u003e\n",
              "                        \u003cth id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002level0_row1\" class=\"row_heading level0 row1\" \u003e4\u003c/th\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col0\" class=\"data row1 col0\" \u003e0%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col1\" class=\"data row1 col1\" \u003e19%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col2\" class=\"data row1 col2\" \u003e57%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row1_col3\" class=\"data row1 col3\" \u003e81%\u003c/td\u003e\n",
              "            \u003c/tr\u003e\n",
              "            \u003ctr\u003e\n",
              "                        \u003cth id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002level0_row2\" class=\"row_heading level0 row2\" \u003e8\u003c/th\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col0\" class=\"data row2 col0\" \u003e0%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col1\" class=\"data row2 col1\" \u003e41%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col2\" class=\"data row2 col2\" \u003e90%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row2_col3\" class=\"data row2 col3\" \u003e95%\u003c/td\u003e\n",
              "            \u003c/tr\u003e\n",
              "            \u003ctr\u003e\n",
              "                        \u003cth id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002level0_row3\" class=\"row_heading level0 row3\" \u003e350\u003c/th\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col0\" class=\"data row3 col0\" \u003e2%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col1\" class=\"data row3 col1\" \u003e23%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col2\" class=\"data row3 col2\" \u003e27%\u003c/td\u003e\n",
              "                        \u003ctd id=\"T_41d539ca_c7cb_11eb_a557_0242ac1c0002row3_col3\" class=\"data row3 col3\" \u003e33%\u003c/td\u003e\n",
              "            \u003c/tr\u003e\n",
              "    \u003c/tbody\u003e\u003c/table\u003e"
            ],
            "text/plain": [
              "\u003cpandas.io.formats.style.Styler at 0x7f00ab9f5f10\u003e"
            ]
          },
          "execution_count": 22,
          "metadata": {
            "tags": []
          },
          "output_type": "execute_result"
        }
      ],
      "source": [
        "pivoted = long_df.pivot_table(values='proteome_fraction_identified', index='num_dipeptides', columns='num_binder', aggfunc='median')\n",
        "pivoted.style.background_gradient(axis=None).format('{:.0%}')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "K_yMSKelXjCy"
      },
      "source": [
        "# Figure from the manuscript\n",
        "![figure.png]()"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [],
      "name": "simulation_of_barcode_reads_to_match_proteome.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
